]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
b frame strategy 2
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
21  */
22
23 /**
24  * @file mpegvideo.c
25  * The simplest mpeg encoder (well, it was the simplest!).
26  */
27
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "mpegvideo.h"
31 #include "faandct.h"
32 #include <limits.h>
33
34 #ifdef USE_FASTMEMCPY
35 #include "fastmemcpy.h"
36 #endif
37
38 //#undef NDEBUG
39 //#include <assert.h>
40
41 #ifdef CONFIG_ENCODERS
42 static void encode_picture(MpegEncContext *s, int picture_number);
43 #endif //CONFIG_ENCODERS
44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
53                                   DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
55                                   DCTELEM *block, int n, int qscale);
56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
57 #ifdef CONFIG_ENCODERS
58 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
59 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
60 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
61 static int sse_mb(MpegEncContext *s);
62 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
63 #endif //CONFIG_ENCODERS
64
65 #ifdef HAVE_XVMC
66 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
67 extern void XVMC_field_end(MpegEncContext *s);
68 extern void XVMC_decode_mb(MpegEncContext *s);
69 #endif
70
71 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
72
73
74 /* enable all paranoid tests for rounding, overflows, etc... */
75 //#define PARANOID
76
77 //#define DEBUG
78
79
80 /* for jpeg fast DCT */
81 #define CONST_BITS 14
82
83 static const uint16_t aanscales[64] = {
84     /* precomputed values scaled up by 14 bits */
85     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
86     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
87     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
88     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
90     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
91     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
92     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
93 };
94
95 static const uint8_t h263_chroma_roundtab[16] = {
96 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
97     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
98 };
99
100 static const uint8_t ff_default_chroma_qscale_table[32]={
101 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
102     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
103 };
104
105 #ifdef CONFIG_ENCODERS
106 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
107 static uint8_t default_fcode_tab[MAX_MV*2+1];
108
109 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
110
111 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
112                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
113 {
114     int qscale;
115     int shift=0;
116
117     for(qscale=qmin; qscale<=qmax; qscale++){
118         int i;
119         if (dsp->fdct == ff_jpeg_fdct_islow
120 #ifdef FAAN_POSTSCALE
121             || dsp->fdct == ff_faandct
122 #endif
123             ) {
124             for(i=0;i<64;i++) {
125                 const int j= dsp->idct_permutation[i];
126                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
127                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
128                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
129                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
130
131                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
132                                 (qscale * quant_matrix[j]));
133             }
134         } else if (dsp->fdct == fdct_ifast
135 #ifndef FAAN_POSTSCALE
136                    || dsp->fdct == ff_faandct
137 #endif
138                    ) {
139             for(i=0;i<64;i++) {
140                 const int j= dsp->idct_permutation[i];
141                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
142                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
143                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
144                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
145
146                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
147                                 (aanscales[i] * qscale * quant_matrix[j]));
148             }
149         } else {
150             for(i=0;i<64;i++) {
151                 const int j= dsp->idct_permutation[i];
152                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
153                    So 16           <= qscale * quant_matrix[i]             <= 7905
154                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
155                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
156                 */
157                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
158 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
159                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
160
161                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
162                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
163             }
164         }
165
166         for(i=intra; i<64; i++){
167             int64_t max= 8191;
168             if (dsp->fdct == fdct_ifast
169 #ifndef FAAN_POSTSCALE
170                    || dsp->fdct == ff_faandct
171 #endif
172                    ) {
173                 max= (8191LL*aanscales[i]) >> 14;
174             }
175             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
176                 shift++;
177             }
178         }
179     }
180     if(shift){
181         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
182     }
183 }
184
185 static inline void update_qscale(MpegEncContext *s){
186     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
187     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
188
189     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
190 }
191 #endif //CONFIG_ENCODERS
192
193 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
194     int i;
195     int end;
196
197     st->scantable= src_scantable;
198
199     for(i=0; i<64; i++){
200         int j;
201         j = src_scantable[i];
202         st->permutated[i] = permutation[j];
203 #ifdef ARCH_POWERPC
204         st->inverse[j] = i;
205 #endif
206     }
207
208     end=-1;
209     for(i=0; i<64; i++){
210         int j;
211         j = st->permutated[i];
212         if(j>end) end=j;
213         st->raster_end[i]= end;
214     }
215 }
216
217 #ifdef CONFIG_ENCODERS
218 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
219     int i;
220
221     if(matrix){
222         put_bits(pb, 1, 1);
223         for(i=0;i<64;i++) {
224             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
225         }
226     }else
227         put_bits(pb, 1, 0);
228 }
229 #endif //CONFIG_ENCODERS
230
231 /* init common dct for both encoder and decoder */
232 int DCT_common_init(MpegEncContext *s)
233 {
234     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
235     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
236     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
237     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
238     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
239     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
240
241 #ifdef CONFIG_ENCODERS
242     s->dct_quantize= dct_quantize_c;
243     s->denoise_dct= denoise_dct_c;
244 #endif //CONFIG_ENCODERS
245
246 #ifdef HAVE_MMX
247     MPV_common_init_mmx(s);
248 #endif
249 #ifdef ARCH_ALPHA
250     MPV_common_init_axp(s);
251 #endif
252 #ifdef HAVE_MLIB
253     MPV_common_init_mlib(s);
254 #endif
255 #ifdef HAVE_MMI
256     MPV_common_init_mmi(s);
257 #endif
258 #ifdef ARCH_ARMV4L
259     MPV_common_init_armv4l(s);
260 #endif
261 #ifdef ARCH_POWERPC
262     MPV_common_init_ppc(s);
263 #endif
264
265 #ifdef CONFIG_ENCODERS
266     s->fast_dct_quantize= s->dct_quantize;
267
268     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
269         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
270     }
271
272 #endif //CONFIG_ENCODERS
273
274     /* load & permutate scantables
275        note: only wmv uses different ones
276     */
277     if(s->alternate_scan){
278         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
279         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
280     }else{
281         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
282         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
283     }
284     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
285     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
286
287     return 0;
288 }
289
290 static void copy_picture(Picture *dst, Picture *src){
291     *dst = *src;
292     dst->type= FF_BUFFER_TYPE_COPY;
293 }
294
295 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
296     int i;
297
298     dst->pict_type              = src->pict_type;
299     dst->quality                = src->quality;
300     dst->coded_picture_number   = src->coded_picture_number;
301     dst->display_picture_number = src->display_picture_number;
302 //    dst->reference              = src->reference;
303     dst->pts                    = src->pts;
304     dst->interlaced_frame       = src->interlaced_frame;
305     dst->top_field_first        = src->top_field_first;
306
307     if(s->avctx->me_threshold){
308         if(!src->motion_val[0])
309             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
310         if(!src->mb_type)
311             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
312         if(!src->ref_index[0])
313             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
314         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
315             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
316             src->motion_subsample_log2, dst->motion_subsample_log2);
317
318         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
319
320         for(i=0; i<2; i++){
321             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
322             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
323
324             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
325                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
326             }
327             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
328                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
329             }
330         }
331     }
332 }
333
334 /**
335  * allocates a Picture
336  * The pixels are allocated/set by calling get_buffer() if shared=0
337  */
338 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
339     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
340     const int mb_array_size= s->mb_stride*s->mb_height;
341     const int b8_array_size= s->b8_stride*s->mb_height*2;
342     const int b4_array_size= s->b4_stride*s->mb_height*4;
343     int i;
344
345     if(shared){
346         assert(pic->data[0]);
347         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
348         pic->type= FF_BUFFER_TYPE_SHARED;
349     }else{
350         int r;
351
352         assert(!pic->data[0]);
353
354         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
355
356         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
357             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
358             return -1;
359         }
360
361         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
362             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
363             return -1;
364         }
365
366         if(pic->linesize[1] != pic->linesize[2]){
367             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
368             return -1;
369         }
370
371         s->linesize  = pic->linesize[0];
372         s->uvlinesize= pic->linesize[1];
373     }
374
375     if(pic->qscale_table==NULL){
376         if (s->encoding) {
377             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
378             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
379             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
380         }
381
382         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
383         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
384         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
385         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
386         if(s->out_format == FMT_H264){
387             for(i=0; i<2; i++){
388                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
389                 pic->motion_val[i]= pic->motion_val_base[i]+4;
390                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
391             }
392             pic->motion_subsample_log2= 2;
393         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
394             for(i=0; i<2; i++){
395                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
396                 pic->motion_val[i]= pic->motion_val_base[i]+4;
397                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
398             }
399             pic->motion_subsample_log2= 3;
400         }
401         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
402             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
403         }
404         pic->qstride= s->mb_stride;
405         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
406     }
407
408     //it might be nicer if the application would keep track of these but it would require a API change
409     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
410     s->prev_pict_types[0]= s->pict_type;
411     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
412         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
413
414     return 0;
415 fail: //for the CHECKED_ALLOCZ macro
416     return -1;
417 }
418
419 /**
420  * deallocates a picture
421  */
422 static void free_picture(MpegEncContext *s, Picture *pic){
423     int i;
424
425     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
426         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
427     }
428
429     av_freep(&pic->mb_var);
430     av_freep(&pic->mc_mb_var);
431     av_freep(&pic->mb_mean);
432     av_freep(&pic->mbskip_table);
433     av_freep(&pic->qscale_table);
434     av_freep(&pic->mb_type_base);
435     av_freep(&pic->dct_coeff);
436     av_freep(&pic->pan_scan);
437     pic->mb_type= NULL;
438     for(i=0; i<2; i++){
439         av_freep(&pic->motion_val_base[i]);
440         av_freep(&pic->ref_index[i]);
441     }
442
443     if(pic->type == FF_BUFFER_TYPE_SHARED){
444         for(i=0; i<4; i++){
445             pic->base[i]=
446             pic->data[i]= NULL;
447         }
448         pic->type= 0;
449     }
450 }
451
452 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
453     int i;
454
455     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
456     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
457     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
458
459      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
460     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
461     s->rd_scratchpad=   s->me.scratchpad;
462     s->b_scratchpad=    s->me.scratchpad;
463     s->obmc_scratchpad= s->me.scratchpad + 16;
464     if (s->encoding) {
465         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
466         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
467         if(s->avctx->noise_reduction){
468             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
469         }
470     }
471     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
472     s->block= s->blocks[0];
473
474     for(i=0;i<12;i++){
475         s->pblocks[i] = (short *)(&s->block[i]);
476     }
477     return 0;
478 fail:
479     return -1; //free() through MPV_common_end()
480 }
481
482 static void free_duplicate_context(MpegEncContext *s){
483     if(s==NULL) return;
484
485     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
486     av_freep(&s->me.scratchpad);
487     s->rd_scratchpad=
488     s->b_scratchpad=
489     s->obmc_scratchpad= NULL;
490
491     av_freep(&s->dct_error_sum);
492     av_freep(&s->me.map);
493     av_freep(&s->me.score_map);
494     av_freep(&s->blocks);
495     s->block= NULL;
496 }
497
498 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
499 #define COPY(a) bak->a= src->a
500     COPY(allocated_edge_emu_buffer);
501     COPY(edge_emu_buffer);
502     COPY(me.scratchpad);
503     COPY(rd_scratchpad);
504     COPY(b_scratchpad);
505     COPY(obmc_scratchpad);
506     COPY(me.map);
507     COPY(me.score_map);
508     COPY(blocks);
509     COPY(block);
510     COPY(start_mb_y);
511     COPY(end_mb_y);
512     COPY(me.map_generation);
513     COPY(pb);
514     COPY(dct_error_sum);
515     COPY(dct_count[0]);
516     COPY(dct_count[1]);
517 #undef COPY
518 }
519
520 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
521     MpegEncContext bak;
522     int i;
523     //FIXME copy only needed parts
524 //START_TIMER
525     backup_duplicate_context(&bak, dst);
526     memcpy(dst, src, sizeof(MpegEncContext));
527     backup_duplicate_context(dst, &bak);
528     for(i=0;i<12;i++){
529         dst->pblocks[i] = (short *)(&dst->block[i]);
530     }
531 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
532 }
533
534 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
535 #define COPY(a) dst->a= src->a
536     COPY(pict_type);
537     COPY(current_picture);
538     COPY(f_code);
539     COPY(b_code);
540     COPY(qscale);
541     COPY(lambda);
542     COPY(lambda2);
543     COPY(picture_in_gop_number);
544     COPY(gop_picture_number);
545     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
546     COPY(progressive_frame); //FIXME don't set in encode_header
547     COPY(partitioned_frame); //FIXME don't set in encode_header
548 #undef COPY
549 }
550
551 /**
552  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
553  * the changed fields will not depend upon the prior state of the MpegEncContext.
554  */
555 static void MPV_common_defaults(MpegEncContext *s){
556     s->y_dc_scale_table=
557     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
558     s->chroma_qscale_table= ff_default_chroma_qscale_table;
559     s->progressive_frame= 1;
560     s->progressive_sequence= 1;
561     s->picture_structure= PICT_FRAME;
562
563     s->coded_picture_number = 0;
564     s->picture_number = 0;
565     s->input_picture_number = 0;
566
567     s->picture_in_gop_number = 0;
568
569     s->f_code = 1;
570     s->b_code = 1;
571 }
572
573 /**
574  * sets the given MpegEncContext to defaults for decoding.
575  * the changed fields will not depend upon the prior state of the MpegEncContext.
576  */
577 void MPV_decode_defaults(MpegEncContext *s){
578     MPV_common_defaults(s);
579 }
580
581 /**
582  * sets the given MpegEncContext to defaults for encoding.
583  * the changed fields will not depend upon the prior state of the MpegEncContext.
584  */
585
586 #ifdef CONFIG_ENCODERS
587 static void MPV_encode_defaults(MpegEncContext *s){
588     static int done=0;
589
590     MPV_common_defaults(s);
591
592     if(!done){
593         int i;
594         done=1;
595
596         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
597         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
598
599         for(i=-16; i<16; i++){
600             default_fcode_tab[i + MAX_MV]= 1;
601         }
602     }
603     s->me.mv_penalty= default_mv_penalty;
604     s->fcode_tab= default_fcode_tab;
605 }
606 #endif //CONFIG_ENCODERS
607
608 /**
609  * init common structure for both encoder and decoder.
610  * this assumes that some variables like width/height are already set
611  */
612 int MPV_common_init(MpegEncContext *s)
613 {
614     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
615
616     if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
617         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
618         return -1;
619     }
620
621     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
622         return -1;
623
624     dsputil_init(&s->dsp, s->avctx);
625     DCT_common_init(s);
626
627     s->flags= s->avctx->flags;
628     s->flags2= s->avctx->flags2;
629
630     s->mb_width  = (s->width  + 15) / 16;
631     s->mb_height = (s->height + 15) / 16;
632     s->mb_stride = s->mb_width + 1;
633     s->b8_stride = s->mb_width*2 + 1;
634     s->b4_stride = s->mb_width*4 + 1;
635     mb_array_size= s->mb_height * s->mb_stride;
636     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
637
638     /* set chroma shifts */
639     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
640                                                     &(s->chroma_y_shift) );
641
642     /* set default edge pos, will be overriden in decode_header if needed */
643     s->h_edge_pos= s->mb_width*16;
644     s->v_edge_pos= s->mb_height*16;
645
646     s->mb_num = s->mb_width * s->mb_height;
647
648     s->block_wrap[0]=
649     s->block_wrap[1]=
650     s->block_wrap[2]=
651     s->block_wrap[3]= s->b8_stride;
652     s->block_wrap[4]=
653     s->block_wrap[5]= s->mb_stride;
654
655     y_size = s->b8_stride * (2 * s->mb_height + 1);
656     c_size = s->mb_stride * (s->mb_height + 1);
657     yc_size = y_size + 2 * c_size;
658
659     /* convert fourcc to upper case */
660     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
661                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
662                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
663                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
664
665     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
666                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
667                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
668                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
669
670     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
671
672     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
673     for(y=0; y<s->mb_height; y++){
674         for(x=0; x<s->mb_width; x++){
675             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
676         }
677     }
678     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
679
680     if (s->encoding) {
681         /* Allocate MV tables */
682         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
683         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
684         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
685         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
686         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
687         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
688         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
689         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
690         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
691         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
692         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
693         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
694
695         if(s->msmpeg4_version){
696             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
697         }
698         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
699
700         /* Allocate MB type table */
701         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
702
703         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
704
705         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
706         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
707         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
708         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
709         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
710         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
711
712         if(s->avctx->noise_reduction){
713             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
714         }
715     }
716     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
717
718     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
719
720     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
721         /* interlaced direct mode decoding tables */
722             for(i=0; i<2; i++){
723                 int j, k;
724                 for(j=0; j<2; j++){
725                     for(k=0; k<2; k++){
726                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
727                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
728                     }
729                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
730                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
731                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
732                 }
733                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
734             }
735     }
736     if (s->out_format == FMT_H263) {
737         /* ac values */
738         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
739         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
740         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
741         s->ac_val[2] = s->ac_val[1] + c_size;
742
743         /* cbp values */
744         CHECKED_ALLOCZ(s->coded_block_base, y_size);
745         s->coded_block= s->coded_block_base + s->b8_stride + 1;
746
747         /* cbp, ac_pred, pred_dir */
748         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
749         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
750     }
751
752     if (s->h263_pred || s->h263_plus || !s->encoding) {
753         /* dc values */
754         //MN: we need these for error resilience of intra-frames
755         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
756         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
757         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
758         s->dc_val[2] = s->dc_val[1] + c_size;
759         for(i=0;i<yc_size;i++)
760             s->dc_val_base[i] = 1024;
761     }
762
763     /* which mb is a intra block */
764     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
765     memset(s->mbintra_table, 1, mb_array_size);
766
767     /* init macroblock skip table */
768     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
769     //Note the +1 is for a quicker mpeg4 slice_end detection
770     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
771
772     s->parse_context.state= -1;
773     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
774        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
775        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
776        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
777     }
778
779     s->context_initialized = 1;
780
781     s->thread_context[0]= s;
782     for(i=1; i<s->avctx->thread_count; i++){
783         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
784         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
785     }
786
787     for(i=0; i<s->avctx->thread_count; i++){
788         if(init_duplicate_context(s->thread_context[i], s) < 0)
789            goto fail;
790         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
791         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
792     }
793
794     return 0;
795  fail:
796     MPV_common_end(s);
797     return -1;
798 }
799
800 /* init common structure for both encoder and decoder */
801 void MPV_common_end(MpegEncContext *s)
802 {
803     int i, j, k;
804
805     for(i=0; i<s->avctx->thread_count; i++){
806         free_duplicate_context(s->thread_context[i]);
807     }
808     for(i=1; i<s->avctx->thread_count; i++){
809         av_freep(&s->thread_context[i]);
810     }
811
812     av_freep(&s->parse_context.buffer);
813     s->parse_context.buffer_size=0;
814
815     av_freep(&s->mb_type);
816     av_freep(&s->p_mv_table_base);
817     av_freep(&s->b_forw_mv_table_base);
818     av_freep(&s->b_back_mv_table_base);
819     av_freep(&s->b_bidir_forw_mv_table_base);
820     av_freep(&s->b_bidir_back_mv_table_base);
821     av_freep(&s->b_direct_mv_table_base);
822     s->p_mv_table= NULL;
823     s->b_forw_mv_table= NULL;
824     s->b_back_mv_table= NULL;
825     s->b_bidir_forw_mv_table= NULL;
826     s->b_bidir_back_mv_table= NULL;
827     s->b_direct_mv_table= NULL;
828     for(i=0; i<2; i++){
829         for(j=0; j<2; j++){
830             for(k=0; k<2; k++){
831                 av_freep(&s->b_field_mv_table_base[i][j][k]);
832                 s->b_field_mv_table[i][j][k]=NULL;
833             }
834             av_freep(&s->b_field_select_table[i][j]);
835             av_freep(&s->p_field_mv_table_base[i][j]);
836             s->p_field_mv_table[i][j]=NULL;
837         }
838         av_freep(&s->p_field_select_table[i]);
839     }
840
841     av_freep(&s->dc_val_base);
842     av_freep(&s->ac_val_base);
843     av_freep(&s->coded_block_base);
844     av_freep(&s->mbintra_table);
845     av_freep(&s->cbp_table);
846     av_freep(&s->pred_dir_table);
847
848     av_freep(&s->mbskip_table);
849     av_freep(&s->prev_pict_types);
850     av_freep(&s->bitstream_buffer);
851     s->allocated_bitstream_buffer_size=0;
852
853     av_freep(&s->avctx->stats_out);
854     av_freep(&s->ac_stats);
855     av_freep(&s->error_status_table);
856     av_freep(&s->mb_index2xy);
857     av_freep(&s->lambda_table);
858     av_freep(&s->q_intra_matrix);
859     av_freep(&s->q_inter_matrix);
860     av_freep(&s->q_intra_matrix16);
861     av_freep(&s->q_inter_matrix16);
862     av_freep(&s->input_picture);
863     av_freep(&s->reordered_input_picture);
864     av_freep(&s->dct_offset);
865
866     if(s->picture){
867         for(i=0; i<MAX_PICTURE_COUNT; i++){
868             free_picture(s, &s->picture[i]);
869         }
870     }
871     av_freep(&s->picture);
872     s->context_initialized = 0;
873     s->last_picture_ptr=
874     s->next_picture_ptr=
875     s->current_picture_ptr= NULL;
876     s->linesize= s->uvlinesize= 0;
877
878     for(i=0; i<3; i++)
879         av_freep(&s->visualization_buffer[i]);
880
881     avcodec_default_free_buffers(s->avctx);
882 }
883
884 #ifdef CONFIG_ENCODERS
885
886 /* init video encoder */
887 int MPV_encode_init(AVCodecContext *avctx)
888 {
889     MpegEncContext *s = avctx->priv_data;
890     int i;
891     int chroma_h_shift, chroma_v_shift;
892
893     MPV_encode_defaults(s);
894
895     if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
896         av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
897         return -1;
898     }
899
900     if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
901         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){
902             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
903             return -1;
904         }
905     }else{
906         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){
907             av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
908             return -1;
909         }
910     }
911
912     s->bit_rate = avctx->bit_rate;
913     s->width = avctx->width;
914     s->height = avctx->height;
915     if(avctx->gop_size > 600){
916         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
917         avctx->gop_size=600;
918     }
919     s->gop_size = avctx->gop_size;
920     s->avctx = avctx;
921     s->flags= avctx->flags;
922     s->flags2= avctx->flags2;
923     s->max_b_frames= avctx->max_b_frames;
924     s->codec_id= avctx->codec->id;
925     s->luma_elim_threshold  = avctx->luma_elim_threshold;
926     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
927     s->strict_std_compliance= avctx->strict_std_compliance;
928     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
929     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
930     s->mpeg_quant= avctx->mpeg_quant;
931     s->rtp_mode= !!avctx->rtp_payload_size;
932     s->intra_dc_precision= avctx->intra_dc_precision;
933     s->user_specified_pts = AV_NOPTS_VALUE;
934
935     if (s->gop_size <= 1) {
936         s->intra_only = 1;
937         s->gop_size = 12;
938     } else {
939         s->intra_only = 0;
940     }
941
942     s->me_method = avctx->me_method;
943
944     /* Fixed QSCALE */
945     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
946
947     s->adaptive_quant= (   s->avctx->lumi_masking
948                         || s->avctx->dark_masking
949                         || s->avctx->temporal_cplx_masking
950                         || s->avctx->spatial_cplx_masking
951                         || s->avctx->p_masking
952                         || s->avctx->border_masking
953                         || (s->flags&CODEC_FLAG_QP_RD))
954                        && !s->fixed_qscale;
955
956     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
957     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
958     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
959
960     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
961         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
962         return -1;
963     }
964
965     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
966         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
967     }
968
969     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
970         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
971         return -1;
972     }
973
974     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
975         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
976         return -1;
977     }
978
979     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
980        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
981        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
982
983         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
984     }
985
986     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
987        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
988         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
989         return -1;
990     }
991
992     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
993         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
994         return -1;
995     }
996
997     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
998         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
999         return -1;
1000     }
1001
1002     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1003         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1004         return -1;
1005     }
1006
1007     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1008         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1009         return -1;
1010     }
1011
1012     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1013         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1014         return -1;
1015     }
1016
1017     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1018        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1019         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1020         return -1;
1021     }
1022
1023     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1024         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1025         return -1;
1026     }
1027
1028     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1029         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1030         return -1;
1031     }
1032
1033     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1034         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1035         return -1;
1036     }
1037
1038     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1039         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1040         return -1;
1041     }
1042
1043     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1044        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1045        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1046         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1047         return -1;
1048     }
1049
1050     if(s->avctx->thread_count > 1)
1051         s->rtp_mode= 1;
1052
1053     if(!avctx->time_base.den || !avctx->time_base.num){
1054         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1055         return -1;
1056     }
1057
1058     i= (INT_MAX/2+128)>>8;
1059     if(avctx->me_threshold >= i){
1060         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1061         return -1;
1062     }
1063     if(avctx->mb_threshold >= i){
1064         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1065         return -1;
1066     }
1067
1068     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1069         av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass");
1070         return -1;
1071     }
1072
1073     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1074     if(i > 1){
1075         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1076         avctx->time_base.den /= i;
1077         avctx->time_base.num /= i;
1078 //        return -1;
1079     }
1080
1081     if(s->codec_id==CODEC_ID_MJPEG){
1082         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1083         s->inter_quant_bias= 0;
1084     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1085         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1086         s->inter_quant_bias= 0;
1087     }else{
1088         s->intra_quant_bias=0;
1089         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1090     }
1091
1092     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1093         s->intra_quant_bias= avctx->intra_quant_bias;
1094     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1095         s->inter_quant_bias= avctx->inter_quant_bias;
1096
1097     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1098
1099     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1100         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1101         return -1;
1102     }
1103     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1104
1105     switch(avctx->codec->id) {
1106     case CODEC_ID_MPEG1VIDEO:
1107         s->out_format = FMT_MPEG1;
1108         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1109         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1110         break;
1111     case CODEC_ID_MPEG2VIDEO:
1112         s->out_format = FMT_MPEG1;
1113         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1114         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1115         s->rtp_mode= 1;
1116         break;
1117     case CODEC_ID_LJPEG:
1118     case CODEC_ID_JPEGLS:
1119     case CODEC_ID_MJPEG:
1120         s->out_format = FMT_MJPEG;
1121         s->intra_only = 1; /* force intra only for jpeg */
1122         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1123         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1124         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1125         s->mjpeg_vsample[1] = 1;
1126         s->mjpeg_vsample[2] = 1;
1127         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1128         s->mjpeg_hsample[1] = 1;
1129         s->mjpeg_hsample[2] = 1;
1130         if (mjpeg_init(s) < 0)
1131             return -1;
1132         avctx->delay=0;
1133         s->low_delay=1;
1134         break;
1135     case CODEC_ID_H261:
1136         s->out_format = FMT_H261;
1137         avctx->delay=0;
1138         s->low_delay=1;
1139         break;
1140     case CODEC_ID_H263:
1141         if (h263_get_picture_format(s->width, s->height) == 7) {
1142             av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
1143             return -1;
1144         }
1145         s->out_format = FMT_H263;
1146         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1147         avctx->delay=0;
1148         s->low_delay=1;
1149         break;
1150     case CODEC_ID_H263P:
1151         s->out_format = FMT_H263;
1152         s->h263_plus = 1;
1153         /* Fx */
1154         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1155         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1156         s->modified_quant= s->h263_aic;
1157         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1158         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1159         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1160         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1161         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1162
1163         /* /Fx */
1164         /* These are just to be sure */
1165         avctx->delay=0;
1166         s->low_delay=1;
1167         break;
1168     case CODEC_ID_FLV1:
1169         s->out_format = FMT_H263;
1170         s->h263_flv = 2; /* format = 1; 11-bit codes */
1171         s->unrestricted_mv = 1;
1172         s->rtp_mode=0; /* don't allow GOB */
1173         avctx->delay=0;
1174         s->low_delay=1;
1175         break;
1176     case CODEC_ID_RV10:
1177         s->out_format = FMT_H263;
1178         avctx->delay=0;
1179         s->low_delay=1;
1180         break;
1181     case CODEC_ID_RV20:
1182         s->out_format = FMT_H263;
1183         avctx->delay=0;
1184         s->low_delay=1;
1185         s->modified_quant=1;
1186         s->h263_aic=1;
1187         s->h263_plus=1;
1188         s->loop_filter=1;
1189         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1190         break;
1191     case CODEC_ID_MPEG4:
1192         s->out_format = FMT_H263;
1193         s->h263_pred = 1;
1194         s->unrestricted_mv = 1;
1195         s->low_delay= s->max_b_frames ? 0 : 1;
1196         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1197         break;
1198     case CODEC_ID_MSMPEG4V1:
1199         s->out_format = FMT_H263;
1200         s->h263_msmpeg4 = 1;
1201         s->h263_pred = 1;
1202         s->unrestricted_mv = 1;
1203         s->msmpeg4_version= 1;
1204         avctx->delay=0;
1205         s->low_delay=1;
1206         break;
1207     case CODEC_ID_MSMPEG4V2:
1208         s->out_format = FMT_H263;
1209         s->h263_msmpeg4 = 1;
1210         s->h263_pred = 1;
1211         s->unrestricted_mv = 1;
1212         s->msmpeg4_version= 2;
1213         avctx->delay=0;
1214         s->low_delay=1;
1215         break;
1216     case CODEC_ID_MSMPEG4V3:
1217         s->out_format = FMT_H263;
1218         s->h263_msmpeg4 = 1;
1219         s->h263_pred = 1;
1220         s->unrestricted_mv = 1;
1221         s->msmpeg4_version= 3;
1222         s->flipflop_rounding=1;
1223         avctx->delay=0;
1224         s->low_delay=1;
1225         break;
1226     case CODEC_ID_WMV1:
1227         s->out_format = FMT_H263;
1228         s->h263_msmpeg4 = 1;
1229         s->h263_pred = 1;
1230         s->unrestricted_mv = 1;
1231         s->msmpeg4_version= 4;
1232         s->flipflop_rounding=1;
1233         avctx->delay=0;
1234         s->low_delay=1;
1235         break;
1236     case CODEC_ID_WMV2:
1237         s->out_format = FMT_H263;
1238         s->h263_msmpeg4 = 1;
1239         s->h263_pred = 1;
1240         s->unrestricted_mv = 1;
1241         s->msmpeg4_version= 5;
1242         s->flipflop_rounding=1;
1243         avctx->delay=0;
1244         s->low_delay=1;
1245         break;
1246     default:
1247         return -1;
1248     }
1249
1250     avctx->has_b_frames= !s->low_delay;
1251
1252     s->encoding = 1;
1253
1254     /* init */
1255     if (MPV_common_init(s) < 0)
1256         return -1;
1257
1258     if(s->modified_quant)
1259         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1260     s->progressive_frame=
1261     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1262     s->quant_precision=5;
1263
1264     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1265     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1266
1267 #ifdef CONFIG_H261_ENCODER
1268     if (s->out_format == FMT_H261)
1269         ff_h261_encode_init(s);
1270 #endif
1271     if (s->out_format == FMT_H263)
1272         h263_encode_init(s);
1273     if(s->msmpeg4_version)
1274         ff_msmpeg4_encode_init(s);
1275     if (s->out_format == FMT_MPEG1)
1276         ff_mpeg1_encode_init(s);
1277
1278     /* init q matrix */
1279     for(i=0;i<64;i++) {
1280         int j= s->dsp.idct_permutation[i];
1281         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1282             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1283             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1284         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1285             s->intra_matrix[j] =
1286             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1287         }else
1288         { /* mpeg1/2 */
1289             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1290             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1291         }
1292         if(s->avctx->intra_matrix)
1293             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1294         if(s->avctx->inter_matrix)
1295             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1296     }
1297
1298     /* precompute matrix */
1299     /* for mjpeg, we do include qscale in the matrix */
1300     if (s->out_format != FMT_MJPEG) {
1301         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1302                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1303         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1304                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1305     }
1306
1307     if(ff_rate_control_init(s) < 0)
1308         return -1;
1309
1310     return 0;
1311 }
1312
1313 int MPV_encode_end(AVCodecContext *avctx)
1314 {
1315     MpegEncContext *s = avctx->priv_data;
1316
1317 #ifdef STATS
1318     print_stats();
1319 #endif
1320
1321     ff_rate_control_uninit(s);
1322
1323     MPV_common_end(s);
1324     if (s->out_format == FMT_MJPEG)
1325         mjpeg_close(s);
1326
1327     av_freep(&avctx->extradata);
1328
1329     return 0;
1330 }
1331
1332 #endif //CONFIG_ENCODERS
1333
1334 void init_rl(RLTable *rl, int use_static)
1335 {
1336     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1337     uint8_t index_run[MAX_RUN+1];
1338     int last, run, level, start, end, i;
1339
1340     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1341     if(use_static && rl->max_level[0])
1342         return;
1343
1344     /* compute max_level[], max_run[] and index_run[] */
1345     for(last=0;last<2;last++) {
1346         if (last == 0) {
1347             start = 0;
1348             end = rl->last;
1349         } else {
1350             start = rl->last;
1351             end = rl->n;
1352         }
1353
1354         memset(max_level, 0, MAX_RUN + 1);
1355         memset(max_run, 0, MAX_LEVEL + 1);
1356         memset(index_run, rl->n, MAX_RUN + 1);
1357         for(i=start;i<end;i++) {
1358             run = rl->table_run[i];
1359             level = rl->table_level[i];
1360             if (index_run[run] == rl->n)
1361                 index_run[run] = i;
1362             if (level > max_level[run])
1363                 max_level[run] = level;
1364             if (run > max_run[level])
1365                 max_run[level] = run;
1366         }
1367         if(use_static)
1368             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1369         else
1370             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1371         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1372         if(use_static)
1373             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1374         else
1375             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1376         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1377         if(use_static)
1378             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1379         else
1380             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1381         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1382     }
1383 }
1384
1385 /* draw the edges of width 'w' of an image of size width, height */
1386 //FIXME check that this is ok for mpeg4 interlaced
1387 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1388 {
1389     uint8_t *ptr, *last_line;
1390     int i;
1391
1392     last_line = buf + (height - 1) * wrap;
1393     for(i=0;i<w;i++) {
1394         /* top and bottom */
1395         memcpy(buf - (i + 1) * wrap, buf, width);
1396         memcpy(last_line + (i + 1) * wrap, last_line, width);
1397     }
1398     /* left and right */
1399     ptr = buf;
1400     for(i=0;i<height;i++) {
1401         memset(ptr - w, ptr[0], w);
1402         memset(ptr + width, ptr[width-1], w);
1403         ptr += wrap;
1404     }
1405     /* corners */
1406     for(i=0;i<w;i++) {
1407         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1408         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1409         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1410         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1411     }
1412 }
1413
1414 int ff_find_unused_picture(MpegEncContext *s, int shared){
1415     int i;
1416
1417     if(shared){
1418         for(i=0; i<MAX_PICTURE_COUNT; i++){
1419             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1420         }
1421     }else{
1422         for(i=0; i<MAX_PICTURE_COUNT; i++){
1423             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1424         }
1425         for(i=0; i<MAX_PICTURE_COUNT; i++){
1426             if(s->picture[i].data[0]==NULL) return i;
1427         }
1428     }
1429
1430     assert(0);
1431     return -1;
1432 }
1433
1434 static void update_noise_reduction(MpegEncContext *s){
1435     int intra, i;
1436
1437     for(intra=0; intra<2; intra++){
1438         if(s->dct_count[intra] > (1<<16)){
1439             for(i=0; i<64; i++){
1440                 s->dct_error_sum[intra][i] >>=1;
1441             }
1442             s->dct_count[intra] >>= 1;
1443         }
1444
1445         for(i=0; i<64; i++){
1446             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1447         }
1448     }
1449 }
1450
1451 /**
1452  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1453  */
1454 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1455 {
1456     int i;
1457     AVFrame *pic;
1458     s->mb_skipped = 0;
1459
1460     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1461
1462     /* mark&release old frames */
1463     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1464         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1465
1466         /* release forgotten pictures */
1467         /* if(mpeg124/h263) */
1468         if(!s->encoding){
1469             for(i=0; i<MAX_PICTURE_COUNT; i++){
1470                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1471                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1472                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1473                 }
1474             }
1475         }
1476     }
1477 alloc:
1478     if(!s->encoding){
1479         /* release non reference frames */
1480         for(i=0; i<MAX_PICTURE_COUNT; i++){
1481             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1482                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1483             }
1484         }
1485
1486         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1487             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1488         else{
1489             i= ff_find_unused_picture(s, 0);
1490             pic= (AVFrame*)&s->picture[i];
1491         }
1492
1493         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1494                         && !s->dropable ? 3 : 0;
1495
1496         pic->coded_picture_number= s->coded_picture_number++;
1497
1498         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1499             return -1;
1500
1501         s->current_picture_ptr= (Picture*)pic;
1502         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1503         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1504     }
1505
1506     s->current_picture_ptr->pict_type= s->pict_type;
1507 //    if(s->flags && CODEC_FLAG_QSCALE)
1508   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1509     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1510
1511     copy_picture(&s->current_picture, s->current_picture_ptr);
1512
1513   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1514     if (s->pict_type != B_TYPE) {
1515         s->last_picture_ptr= s->next_picture_ptr;
1516         if(!s->dropable)
1517             s->next_picture_ptr= s->current_picture_ptr;
1518     }
1519 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1520         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1521         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1522         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1523         s->pict_type, s->dropable);*/
1524
1525     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1526     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1527
1528     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1529         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1530         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1531         goto alloc;
1532     }
1533
1534     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1535
1536     if(s->picture_structure!=PICT_FRAME){
1537         int i;
1538         for(i=0; i<4; i++){
1539             if(s->picture_structure == PICT_BOTTOM_FIELD){
1540                  s->current_picture.data[i] += s->current_picture.linesize[i];
1541             }
1542             s->current_picture.linesize[i] *= 2;
1543             s->last_picture.linesize[i] *=2;
1544             s->next_picture.linesize[i] *=2;
1545         }
1546     }
1547   }
1548
1549     s->hurry_up= s->avctx->hurry_up;
1550     s->error_resilience= avctx->error_resilience;
1551
1552     /* set dequantizer, we can't do it during init as it might change for mpeg4
1553        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1554     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1555         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1556         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1557     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1558         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1559         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1560     }else{
1561         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1562         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1563     }
1564
1565     if(s->dct_error_sum){
1566         assert(s->avctx->noise_reduction && s->encoding);
1567
1568         update_noise_reduction(s);
1569     }
1570
1571 #ifdef HAVE_XVMC
1572     if(s->avctx->xvmc_acceleration)
1573         return XVMC_field_start(s, avctx);
1574 #endif
1575     return 0;
1576 }
1577
1578 /* generic function for encode/decode called after a frame has been coded/decoded */
1579 void MPV_frame_end(MpegEncContext *s)
1580 {
1581     int i;
1582     /* draw edge for correct motion prediction if outside */
1583 #ifdef HAVE_XVMC
1584 //just to make sure that all data is rendered.
1585     if(s->avctx->xvmc_acceleration){
1586         XVMC_field_end(s);
1587     }else
1588 #endif
1589     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1590             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1591             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1592             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1593     }
1594     emms_c();
1595
1596     s->last_pict_type    = s->pict_type;
1597     if(s->pict_type!=B_TYPE){
1598         s->last_non_b_pict_type= s->pict_type;
1599     }
1600 #if 0
1601         /* copy back current_picture variables */
1602     for(i=0; i<MAX_PICTURE_COUNT; i++){
1603         if(s->picture[i].data[0] == s->current_picture.data[0]){
1604             s->picture[i]= s->current_picture;
1605             break;
1606         }
1607     }
1608     assert(i<MAX_PICTURE_COUNT);
1609 #endif
1610
1611     if(s->encoding){
1612         /* release non-reference frames */
1613         for(i=0; i<MAX_PICTURE_COUNT; i++){
1614             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1615                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1616             }
1617         }
1618     }
1619     // clear copies, to avoid confusion
1620 #if 0
1621     memset(&s->last_picture, 0, sizeof(Picture));
1622     memset(&s->next_picture, 0, sizeof(Picture));
1623     memset(&s->current_picture, 0, sizeof(Picture));
1624 #endif
1625     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1626 }
1627
1628 /**
1629  * draws an line from (ex, ey) -> (sx, sy).
1630  * @param w width of the image
1631  * @param h height of the image
1632  * @param stride stride/linesize of the image
1633  * @param color color of the arrow
1634  */
1635 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1636     int t, x, y, fr, f;
1637
1638     sx= clip(sx, 0, w-1);
1639     sy= clip(sy, 0, h-1);
1640     ex= clip(ex, 0, w-1);
1641     ey= clip(ey, 0, h-1);
1642
1643     buf[sy*stride + sx]+= color;
1644
1645     if(ABS(ex - sx) > ABS(ey - sy)){
1646         if(sx > ex){
1647             t=sx; sx=ex; ex=t;
1648             t=sy; sy=ey; ey=t;
1649         }
1650         buf+= sx + sy*stride;
1651         ex-= sx;
1652         f= ((ey-sy)<<16)/ex;
1653         for(x= 0; x <= ex; x++){
1654             y = (x*f)>>16;
1655             fr= (x*f)&0xFFFF;
1656             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1657             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1658         }
1659     }else{
1660         if(sy > ey){
1661             t=sx; sx=ex; ex=t;
1662             t=sy; sy=ey; ey=t;
1663         }
1664         buf+= sx + sy*stride;
1665         ey-= sy;
1666         if(ey) f= ((ex-sx)<<16)/ey;
1667         else   f= 0;
1668         for(y= 0; y <= ey; y++){
1669             x = (y*f)>>16;
1670             fr= (y*f)&0xFFFF;
1671             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1672             buf[y*stride + x+1]+= (color*         fr )>>16;;
1673         }
1674     }
1675 }
1676
1677 /**
1678  * draws an arrow from (ex, ey) -> (sx, sy).
1679  * @param w width of the image
1680  * @param h height of the image
1681  * @param stride stride/linesize of the image
1682  * @param color color of the arrow
1683  */
1684 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1685     int dx,dy;
1686
1687     sx= clip(sx, -100, w+100);
1688     sy= clip(sy, -100, h+100);
1689     ex= clip(ex, -100, w+100);
1690     ey= clip(ey, -100, h+100);
1691
1692     dx= ex - sx;
1693     dy= ey - sy;
1694
1695     if(dx*dx + dy*dy > 3*3){
1696         int rx=  dx + dy;
1697         int ry= -dx + dy;
1698         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1699
1700         //FIXME subpixel accuracy
1701         rx= ROUNDED_DIV(rx*3<<4, length);
1702         ry= ROUNDED_DIV(ry*3<<4, length);
1703
1704         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1705         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1706     }
1707     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1708 }
1709
1710 /**
1711  * prints debuging info for the given picture.
1712  */
1713 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1714
1715     if(!pict || !pict->mb_type) return;
1716
1717     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1718         int x,y;
1719
1720         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1721         switch (pict->pict_type) {
1722             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1723             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1724             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1725             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1726             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1727             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1728         }
1729         for(y=0; y<s->mb_height; y++){
1730             for(x=0; x<s->mb_width; x++){
1731                 if(s->avctx->debug&FF_DEBUG_SKIP){
1732                     int count= s->mbskip_table[x + y*s->mb_stride];
1733                     if(count>9) count=9;
1734                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1735                 }
1736                 if(s->avctx->debug&FF_DEBUG_QP){
1737                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1738                 }
1739                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1740                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1741                     //Type & MV direction
1742                     if(IS_PCM(mb_type))
1743                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1744                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1745                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1746                     else if(IS_INTRA4x4(mb_type))
1747                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1748                     else if(IS_INTRA16x16(mb_type))
1749                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1750                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1751                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1752                     else if(IS_DIRECT(mb_type))
1753                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1754                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1755                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1756                     else if(IS_GMC(mb_type))
1757                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1758                     else if(IS_SKIP(mb_type))
1759                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1760                     else if(!USES_LIST(mb_type, 1))
1761                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1762                     else if(!USES_LIST(mb_type, 0))
1763                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1764                     else{
1765                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1766                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1767                     }
1768
1769                     //segmentation
1770                     if(IS_8X8(mb_type))
1771                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1772                     else if(IS_16X8(mb_type))
1773                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1774                     else if(IS_8X16(mb_type))
1775                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1776                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1777                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1778                     else
1779                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1780
1781
1782                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1783                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1784                     else
1785                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1786                 }
1787 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1788             }
1789             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1790         }
1791     }
1792
1793     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1794         const int shift= 1 + s->quarter_sample;
1795         int mb_y;
1796         uint8_t *ptr;
1797         int i;
1798         int h_chroma_shift, v_chroma_shift;
1799         const int width = s->avctx->width;
1800         const int height= s->avctx->height;
1801         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1802         const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
1803         s->low_delay=0; //needed to see the vectors without trashing the buffers
1804
1805         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1806         for(i=0; i<3; i++){
1807             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1808             pict->data[i]= s->visualization_buffer[i];
1809         }
1810         pict->type= FF_BUFFER_TYPE_COPY;
1811         ptr= pict->data[0];
1812
1813         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1814             int mb_x;
1815             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1816                 const int mb_index= mb_x + mb_y*s->mb_stride;
1817                 if((s->avctx->debug_mv) && pict->motion_val){
1818                   int type;
1819                   for(type=0; type<3; type++){
1820                     int direction = 0;
1821                     switch (type) {
1822                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1823                                 continue;
1824                               direction = 0;
1825                               break;
1826                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1827                                 continue;
1828                               direction = 0;
1829                               break;
1830                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1831                                 continue;
1832                               direction = 1;
1833                               break;
1834                     }
1835                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1836                         continue;
1837
1838                     if(IS_8X8(pict->mb_type[mb_index])){
1839                       int i;
1840                       for(i=0; i<4; i++){
1841                         int sx= mb_x*16 + 4 + 8*(i&1);
1842                         int sy= mb_y*16 + 4 + 8*(i>>1);
1843                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1844                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1845                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1846                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1847                       }
1848                     }else if(IS_16X8(pict->mb_type[mb_index])){
1849                       int i;
1850                       for(i=0; i<2; i++){
1851                         int sx=mb_x*16 + 8;
1852                         int sy=mb_y*16 + 4 + 8*i;
1853                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1854                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1855                         int my=(pict->motion_val[direction][xy][1]>>shift);
1856
1857                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1858                             my*=2;
1859
1860                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1861                       }
1862                     }else if(IS_8X16(pict->mb_type[mb_index])){
1863                       int i;
1864                       for(i=0; i<2; i++){
1865                         int sx=mb_x*16 + 4 + 8*i;
1866                         int sy=mb_y*16 + 8;
1867                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1868                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1869                         int my=(pict->motion_val[direction][xy][1]>>shift);
1870
1871                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1872                             my*=2;
1873
1874                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1875                       }
1876                     }else{
1877                       int sx= mb_x*16 + 8;
1878                       int sy= mb_y*16 + 8;
1879                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1880                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1881                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1882                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1883                     }
1884                   }
1885                 }
1886                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1887                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1888                     int y;
1889                     for(y=0; y<8; y++){
1890                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1891                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1892                     }
1893                 }
1894                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1895                     int mb_type= pict->mb_type[mb_index];
1896                     uint64_t u,v;
1897                     int y;
1898 #define COLOR(theta, r)\
1899 u= (int)(128 + r*cos(theta*3.141592/180));\
1900 v= (int)(128 + r*sin(theta*3.141592/180));
1901
1902
1903                     u=v=128;
1904                     if(IS_PCM(mb_type)){
1905                         COLOR(120,48)
1906                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1907                         COLOR(30,48)
1908                     }else if(IS_INTRA4x4(mb_type)){
1909                         COLOR(90,48)
1910                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1911 //                        COLOR(120,48)
1912                     }else if(IS_DIRECT(mb_type)){
1913                         COLOR(150,48)
1914                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1915                         COLOR(170,48)
1916                     }else if(IS_GMC(mb_type)){
1917                         COLOR(190,48)
1918                     }else if(IS_SKIP(mb_type)){
1919 //                        COLOR(180,48)
1920                     }else if(!USES_LIST(mb_type, 1)){
1921                         COLOR(240,48)
1922                     }else if(!USES_LIST(mb_type, 0)){
1923                         COLOR(0,48)
1924                     }else{
1925                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1926                         COLOR(300,48)
1927                     }
1928
1929                     u*= 0x0101010101010101ULL;
1930                     v*= 0x0101010101010101ULL;
1931                     for(y=0; y<8; y++){
1932                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1933                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1934                     }
1935
1936                     //segmentation
1937                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1938                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1939                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1940                     }
1941                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1942                         for(y=0; y<16; y++)
1943                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1944                     }
1945                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1946                         int dm= 1 << (mv_sample_log2-2);
1947                         for(i=0; i<4; i++){
1948                             int sx= mb_x*16 + 8*(i&1);
1949                             int sy= mb_y*16 + 8*(i>>1);
1950                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1951                             //FIXME bidir
1952                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1953                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1954                                 for(y=0; y<8; y++)
1955                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1956                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1957                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1958                         }
1959                     }
1960
1961                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1962                         // hmm
1963                     }
1964                 }
1965                 s->mbskip_table[mb_index]=0;
1966             }
1967         }
1968     }
1969 }
1970
1971 #ifdef CONFIG_ENCODERS
1972
1973 static int get_sae(uint8_t *src, int ref, int stride){
1974     int x,y;
1975     int acc=0;
1976
1977     for(y=0; y<16; y++){
1978         for(x=0; x<16; x++){
1979             acc+= ABS(src[x+y*stride] - ref);
1980         }
1981     }
1982
1983     return acc;
1984 }
1985
1986 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1987     int x, y, w, h;
1988     int acc=0;
1989
1990     w= s->width &~15;
1991     h= s->height&~15;
1992
1993     for(y=0; y<h; y+=16){
1994         for(x=0; x<w; x+=16){
1995             int offset= x + y*stride;
1996             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1997             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1998             int sae = get_sae(src + offset, mean, stride);
1999
2000             acc+= sae + 500 < sad;
2001         }
2002     }
2003     return acc;
2004 }
2005
2006
2007 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2008     AVFrame *pic=NULL;
2009     int64_t pts;
2010     int i;
2011     const int encoding_delay= s->max_b_frames;
2012     int direct=1;
2013
2014     if(pic_arg){
2015         pts= pic_arg->pts;
2016         pic_arg->display_picture_number= s->input_picture_number++;
2017
2018         if(pts != AV_NOPTS_VALUE){
2019             if(s->user_specified_pts != AV_NOPTS_VALUE){
2020                 int64_t time= pts;
2021                 int64_t last= s->user_specified_pts;
2022
2023                 if(time <= last){
2024                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2025                     return -1;
2026                 }
2027             }
2028             s->user_specified_pts= pts;
2029         }else{
2030             if(s->user_specified_pts != AV_NOPTS_VALUE){
2031                 s->user_specified_pts=
2032                 pts= s->user_specified_pts + 1;
2033                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2034             }else{
2035                 pts= pic_arg->display_picture_number;
2036             }
2037         }
2038     }
2039
2040   if(pic_arg){
2041     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2042     if(pic_arg->linesize[0] != s->linesize) direct=0;
2043     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2044     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2045
2046 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2047
2048     if(direct){
2049         i= ff_find_unused_picture(s, 1);
2050
2051         pic= (AVFrame*)&s->picture[i];
2052         pic->reference= 3;
2053
2054         for(i=0; i<4; i++){
2055             pic->data[i]= pic_arg->data[i];
2056             pic->linesize[i]= pic_arg->linesize[i];
2057         }
2058         alloc_picture(s, (Picture*)pic, 1);
2059     }else{
2060         int offset= 16;
2061         i= ff_find_unused_picture(s, 0);
2062
2063         pic= (AVFrame*)&s->picture[i];
2064         pic->reference= 3;
2065
2066         alloc_picture(s, (Picture*)pic, 0);
2067
2068         if(   pic->data[0] + offset == pic_arg->data[0]
2069            && pic->data[1] + offset == pic_arg->data[1]
2070            && pic->data[2] + offset == pic_arg->data[2]){
2071        // empty
2072         }else{
2073             int h_chroma_shift, v_chroma_shift;
2074             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2075
2076             for(i=0; i<3; i++){
2077                 int src_stride= pic_arg->linesize[i];
2078                 int dst_stride= i ? s->uvlinesize : s->linesize;
2079                 int h_shift= i ? h_chroma_shift : 0;
2080                 int v_shift= i ? v_chroma_shift : 0;
2081                 int w= s->width >>h_shift;
2082                 int h= s->height>>v_shift;
2083                 uint8_t *src= pic_arg->data[i];
2084                 uint8_t *dst= pic->data[i] + offset;
2085
2086                 if(src_stride==dst_stride)
2087                     memcpy(dst, src, src_stride*h);
2088                 else{
2089                     while(h--){
2090                         memcpy(dst, src, w);
2091                         dst += dst_stride;
2092                         src += src_stride;
2093                     }
2094                 }
2095             }
2096         }
2097     }
2098     copy_picture_attributes(s, pic, pic_arg);
2099     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2100   }
2101
2102     /* shift buffer entries */
2103     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2104         s->input_picture[i-1]= s->input_picture[i];
2105
2106     s->input_picture[encoding_delay]= (Picture*)pic;
2107
2108     return 0;
2109 }
2110
2111 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2112     int x, y, plane;
2113     int score=0;
2114     int64_t score64=0;
2115
2116     for(plane=0; plane<3; plane++){
2117         const int stride= p->linesize[plane];
2118         const int bw= plane ? 1 : 2;
2119         for(y=0; y<s->mb_height*bw; y++){
2120             for(x=0; x<s->mb_width*bw; x++){
2121                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2122                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2123
2124                 switch(s->avctx->frame_skip_exp){
2125                     case 0: score= FFMAX(score, v); break;
2126                     case 1: score+= ABS(v);break;
2127                     case 2: score+= v*v;break;
2128                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2129                     case 4: score64+= v*v*(int64_t)(v*v);break;
2130                 }
2131             }
2132         }
2133     }
2134
2135     if(score) score64= score;
2136
2137     if(score64 < s->avctx->frame_skip_threshold)
2138         return 1;
2139     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2140         return 1;
2141     return 0;
2142 }
2143
2144 static int estimate_best_b_count(MpegEncContext *s){
2145     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2146     AVCodecContext *c= avcodec_alloc_context();
2147     AVFrame input[FF_MAX_B_FRAMES+2];
2148     const int scale= 0;
2149     int i, j, out_size;
2150     int outbuf_size= (s->width * s->height) >> (2*scale); //FIXME
2151     uint8_t *outbuf= av_malloc(outbuf_size);
2152     ImgReSampleContext *resample;
2153     int64_t best_rd= INT64_MAX;
2154     int best_b_count= -1;
2155     const int lambda2= s->lambda2;
2156
2157     c->width = s->width >> scale;
2158     c->height= s->height>> scale;
2159     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2160     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2161     c->mb_decision= s->avctx->mb_decision;
2162     c->me_cmp= s->avctx->me_cmp;
2163     c->mb_cmp= s->avctx->mb_cmp;
2164     c->me_sub_cmp= s->avctx->me_sub_cmp;
2165     c->pix_fmt = PIX_FMT_YUV420P;
2166     c->time_base= s->avctx->time_base;
2167     c->max_b_frames= s->max_b_frames;
2168
2169     if (avcodec_open(c, codec) < 0)
2170         return -1;
2171
2172     resample= img_resample_init(c->width, c->height, s->width, s->height); //FIXME use sws
2173
2174     for(i=0; i<s->max_b_frames+2; i++){
2175         int ysize= c->width*c->height;
2176         int csize= (c->width/2)*(c->height/2);
2177
2178         avcodec_get_frame_defaults(&input[i]);
2179         input[i].data[0]= av_malloc(ysize + 2*csize);
2180         input[i].data[1]= input[i].data[0] + ysize;
2181         input[i].data[2]= input[i].data[1] + csize;
2182         input[i].linesize[0]= c->width;
2183         input[i].linesize[1]=
2184         input[i].linesize[2]= c->width/2;
2185
2186         if(!i || s->input_picture[i-1])
2187             img_resample(resample, &input[i], i ? s->input_picture[i-1] : s->next_picture_ptr);
2188     }
2189
2190     for(j=0; j<s->max_b_frames+1; j++){
2191         int64_t rd=0;
2192
2193         if(!s->input_picture[j])
2194             break;
2195
2196         c->error[0]= c->error[1]= c->error[2]= 0;
2197
2198         input[0].pict_type= I_TYPE;
2199         input[0].quality= 2 * FF_QP2LAMBDA;
2200         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2201
2202         for(i=0; i<s->max_b_frames+1; i++){
2203             int is_p= i % (j+1) == j || i==s->max_b_frames;
2204
2205             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2206             input[i+1].quality= s->rc_context.last_qscale_for[input[i+1].pict_type];
2207             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2208             rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2209         }
2210
2211         /* get the delayed frames */
2212         while(out_size){
2213             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2214             rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2215         }
2216
2217         rd += c->error[0] + c->error[1] + c->error[2];
2218
2219         if(rd < best_rd){
2220             best_rd= rd;
2221             best_b_count= j;
2222         }
2223     }
2224
2225     av_freep(&outbuf);
2226     avcodec_close(c);
2227     av_freep(&c);
2228     img_resample_close(resample);
2229
2230     for(i=0; i<s->max_b_frames+2; i++){
2231         av_freep(&input[i].data[0]);
2232     }
2233
2234     return best_b_count;
2235 }
2236
2237 static void select_input_picture(MpegEncContext *s){
2238     int i;
2239
2240     for(i=1; i<MAX_PICTURE_COUNT; i++)
2241         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2242     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2243
2244     /* set next picture type & ordering */
2245     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2246         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2247             s->reordered_input_picture[0]= s->input_picture[0];
2248             s->reordered_input_picture[0]->pict_type= I_TYPE;
2249             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2250         }else{
2251             int b_frames;
2252
2253             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2254                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2255                 //FIXME check that te gop check above is +-1 correct
2256 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2257
2258                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2259                         for(i=0; i<4; i++)
2260                             s->input_picture[0]->data[i]= NULL;
2261                         s->input_picture[0]->type= 0;
2262                     }else{
2263                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2264                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2265
2266                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2267                     }
2268
2269                     emms_c();
2270                     ff_vbv_update(s, 0);
2271
2272                     goto no_output_pic;
2273                 }
2274             }
2275
2276             if(s->flags&CODEC_FLAG_PASS2){
2277                 for(i=0; i<s->max_b_frames+1; i++){
2278                     int pict_num= s->input_picture[0]->display_picture_number + i;
2279
2280                     if(pict_num >= s->rc_context.num_entries)
2281                         break;
2282                     if(!s->input_picture[i]){
2283                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2284                         break;
2285                     }
2286
2287                     s->input_picture[i]->pict_type=
2288                         s->rc_context.entry[pict_num].new_pict_type;
2289                 }
2290             }
2291
2292             if(s->avctx->b_frame_strategy==0){
2293                 b_frames= s->max_b_frames;
2294                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2295             }else if(s->avctx->b_frame_strategy==1){
2296                 for(i=1; i<s->max_b_frames+1; i++){
2297                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2298                         s->input_picture[i]->b_frame_score=
2299                             get_intra_count(s, s->input_picture[i  ]->data[0],
2300                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2301                     }
2302                 }
2303                 for(i=0; i<s->max_b_frames+1; i++){
2304                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2305                 }
2306
2307                 b_frames= FFMAX(0, i-1);
2308
2309                 /* reset scores */
2310                 for(i=0; i<b_frames+1; i++){
2311                     s->input_picture[i]->b_frame_score=0;
2312                 }
2313             }else if(s->avctx->b_frame_strategy==2){
2314                 b_frames= estimate_best_b_count(s);
2315             }else{
2316                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2317                 b_frames=0;
2318             }
2319
2320             emms_c();
2321 //static int b_count=0;
2322 //b_count+= b_frames;
2323 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2324
2325             for(i= b_frames - 1; i>=0; i--){
2326                 int type= s->input_picture[i]->pict_type;
2327                 if(type && type != B_TYPE)
2328                     b_frames= i;
2329             }
2330             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2331                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2332             }
2333
2334             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2335               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2336                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2337               }else{
2338                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2339                     b_frames=0;
2340                 s->input_picture[b_frames]->pict_type= I_TYPE;
2341               }
2342             }
2343
2344             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2345                && b_frames
2346                && s->input_picture[b_frames]->pict_type== I_TYPE)
2347                 b_frames--;
2348
2349             s->reordered_input_picture[0]= s->input_picture[b_frames];
2350             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2351                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2352             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2353             for(i=0; i<b_frames; i++){
2354                 s->reordered_input_picture[i+1]= s->input_picture[i];
2355                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2356                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2357             }
2358         }
2359     }
2360 no_output_pic:
2361     if(s->reordered_input_picture[0]){
2362         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2363
2364         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2365
2366         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2367             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2368
2369             int i= ff_find_unused_picture(s, 0);
2370             Picture *pic= &s->picture[i];
2371
2372             /* mark us unused / free shared pic */
2373             for(i=0; i<4; i++)
2374                 s->reordered_input_picture[0]->data[i]= NULL;
2375             s->reordered_input_picture[0]->type= 0;
2376
2377             pic->reference              = s->reordered_input_picture[0]->reference;
2378
2379             alloc_picture(s, pic, 0);
2380
2381             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2382
2383             s->current_picture_ptr= pic;
2384         }else{
2385             // input is not a shared pix -> reuse buffer for current_pix
2386
2387             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2388                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2389
2390             s->current_picture_ptr= s->reordered_input_picture[0];
2391             for(i=0; i<4; i++){
2392                 s->new_picture.data[i]+=16;
2393             }
2394         }
2395         copy_picture(&s->current_picture, s->current_picture_ptr);
2396
2397         s->picture_number= s->new_picture.display_picture_number;
2398 //printf("dpn:%d\n", s->picture_number);
2399     }else{
2400        memset(&s->new_picture, 0, sizeof(Picture));
2401     }
2402 }
2403
2404 int MPV_encode_picture(AVCodecContext *avctx,
2405                        unsigned char *buf, int buf_size, void *data)
2406 {
2407     MpegEncContext *s = avctx->priv_data;
2408     AVFrame *pic_arg = data;
2409     int i, stuffing_count;
2410
2411     if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2412         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2413         return -1;
2414     }
2415
2416     for(i=0; i<avctx->thread_count; i++){
2417         int start_y= s->thread_context[i]->start_mb_y;
2418         int   end_y= s->thread_context[i]->  end_mb_y;
2419         int h= s->mb_height;
2420         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2421         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2422
2423         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2424     }
2425
2426     s->picture_in_gop_number++;
2427
2428     if(load_input_picture(s, pic_arg) < 0)
2429         return -1;
2430
2431     select_input_picture(s);
2432
2433     /* output? */
2434     if(s->new_picture.data[0]){
2435         s->pict_type= s->new_picture.pict_type;
2436 //emms_c();
2437 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2438         MPV_frame_start(s, avctx);
2439
2440         encode_picture(s, s->picture_number);
2441
2442         avctx->real_pict_num  = s->picture_number;
2443         avctx->header_bits = s->header_bits;
2444         avctx->mv_bits     = s->mv_bits;
2445         avctx->misc_bits   = s->misc_bits;
2446         avctx->i_tex_bits  = s->i_tex_bits;
2447         avctx->p_tex_bits  = s->p_tex_bits;
2448         avctx->i_count     = s->i_count;
2449         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2450         avctx->skip_count  = s->skip_count;
2451
2452         MPV_frame_end(s);
2453
2454         if (s->out_format == FMT_MJPEG)
2455             mjpeg_picture_trailer(s);
2456
2457         if(s->flags&CODEC_FLAG_PASS1)
2458             ff_write_pass1_stats(s);
2459
2460         for(i=0; i<4; i++){
2461             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2462             avctx->error[i] += s->current_picture_ptr->error[i];
2463         }
2464
2465         if(s->flags&CODEC_FLAG_PASS1)
2466             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2467         flush_put_bits(&s->pb);
2468         s->frame_bits  = put_bits_count(&s->pb);
2469
2470         stuffing_count= ff_vbv_update(s, s->frame_bits);
2471         if(stuffing_count){
2472             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2473                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2474                 return -1;
2475             }
2476
2477             switch(s->codec_id){
2478             case CODEC_ID_MPEG1VIDEO:
2479             case CODEC_ID_MPEG2VIDEO:
2480                 while(stuffing_count--){
2481                     put_bits(&s->pb, 8, 0);
2482                 }
2483             break;
2484             case CODEC_ID_MPEG4:
2485                 put_bits(&s->pb, 16, 0);
2486                 put_bits(&s->pb, 16, 0x1C3);
2487                 stuffing_count -= 4;
2488                 while(stuffing_count--){
2489                     put_bits(&s->pb, 8, 0xFF);
2490                 }
2491             break;
2492             default:
2493                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2494             }
2495             flush_put_bits(&s->pb);
2496             s->frame_bits  = put_bits_count(&s->pb);
2497         }
2498
2499         /* update mpeg1/2 vbv_delay for CBR */
2500         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2501            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2502             int vbv_delay;
2503
2504             assert(s->repeat_first_field==0);
2505
2506             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2507             assert(vbv_delay < 0xFFFF);
2508
2509             s->vbv_delay_ptr[0] &= 0xF8;
2510             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2511             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2512             s->vbv_delay_ptr[2] &= 0x07;
2513             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2514         }
2515         s->total_bits += s->frame_bits;
2516         avctx->frame_bits  = s->frame_bits;
2517     }else{
2518         assert((pbBufPtr(&s->pb) == s->pb.buf));
2519         s->frame_bits=0;
2520     }
2521     assert((s->frame_bits&7)==0);
2522
2523     return s->frame_bits/8;
2524 }
2525
2526 #endif //CONFIG_ENCODERS
2527
2528 static inline void gmc1_motion(MpegEncContext *s,
2529                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2530                                uint8_t **ref_picture)
2531 {
2532     uint8_t *ptr;
2533     int offset, src_x, src_y, linesize, uvlinesize;
2534     int motion_x, motion_y;
2535     int emu=0;
2536
2537     motion_x= s->sprite_offset[0][0];
2538     motion_y= s->sprite_offset[0][1];
2539     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2540     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2541     motion_x<<=(3-s->sprite_warping_accuracy);
2542     motion_y<<=(3-s->sprite_warping_accuracy);
2543     src_x = clip(src_x, -16, s->width);
2544     if (src_x == s->width)
2545         motion_x =0;
2546     src_y = clip(src_y, -16, s->height);
2547     if (src_y == s->height)
2548         motion_y =0;
2549
2550     linesize = s->linesize;
2551     uvlinesize = s->uvlinesize;
2552
2553     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2554
2555     if(s->flags&CODEC_FLAG_EMU_EDGE){
2556         if(   (unsigned)src_x >= s->h_edge_pos - 17
2557            || (unsigned)src_y >= s->v_edge_pos - 17){
2558             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2559             ptr= s->edge_emu_buffer;
2560         }
2561     }
2562
2563     if((motion_x|motion_y)&7){
2564         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2565         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2566     }else{
2567         int dxy;
2568
2569         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2570         if (s->no_rounding){
2571             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2572         }else{
2573             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2574         }
2575     }
2576
2577     if(s->flags&CODEC_FLAG_GRAY) return;
2578
2579     motion_x= s->sprite_offset[1][0];
2580     motion_y= s->sprite_offset[1][1];
2581     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2582     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2583     motion_x<<=(3-s->sprite_warping_accuracy);
2584     motion_y<<=(3-s->sprite_warping_accuracy);
2585     src_x = clip(src_x, -8, s->width>>1);
2586     if (src_x == s->width>>1)
2587         motion_x =0;
2588     src_y = clip(src_y, -8, s->height>>1);
2589     if (src_y == s->height>>1)
2590         motion_y =0;
2591
2592     offset = (src_y * uvlinesize) + src_x;
2593     ptr = ref_picture[1] + offset;
2594     if(s->flags&CODEC_FLAG_EMU_EDGE){
2595         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2596            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2597             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2598             ptr= s->edge_emu_buffer;
2599             emu=1;
2600         }
2601     }
2602     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2603
2604     ptr = ref_picture[2] + offset;
2605     if(emu){
2606         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2607         ptr= s->edge_emu_buffer;
2608     }
2609     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2610
2611     return;
2612 }
2613
2614 static inline void gmc_motion(MpegEncContext *s,
2615                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2616                                uint8_t **ref_picture)
2617 {
2618     uint8_t *ptr;
2619     int linesize, uvlinesize;
2620     const int a= s->sprite_warping_accuracy;
2621     int ox, oy;
2622
2623     linesize = s->linesize;
2624     uvlinesize = s->uvlinesize;
2625
2626     ptr = ref_picture[0];
2627
2628     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2629     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2630
2631     s->dsp.gmc(dest_y, ptr, linesize, 16,
2632            ox,
2633            oy,
2634            s->sprite_delta[0][0], s->sprite_delta[0][1],
2635            s->sprite_delta[1][0], s->sprite_delta[1][1],
2636            a+1, (1<<(2*a+1)) - s->no_rounding,
2637            s->h_edge_pos, s->v_edge_pos);
2638     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2639            ox + s->sprite_delta[0][0]*8,
2640            oy + s->sprite_delta[1][0]*8,
2641            s->sprite_delta[0][0], s->sprite_delta[0][1],
2642            s->sprite_delta[1][0], s->sprite_delta[1][1],
2643            a+1, (1<<(2*a+1)) - s->no_rounding,
2644            s->h_edge_pos, s->v_edge_pos);
2645
2646     if(s->flags&CODEC_FLAG_GRAY) return;
2647
2648     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2649     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2650
2651     ptr = ref_picture[1];
2652     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2653            ox,
2654            oy,
2655            s->sprite_delta[0][0], s->sprite_delta[0][1],
2656            s->sprite_delta[1][0], s->sprite_delta[1][1],
2657            a+1, (1<<(2*a+1)) - s->no_rounding,
2658            s->h_edge_pos>>1, s->v_edge_pos>>1);
2659
2660     ptr = ref_picture[2];
2661     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2662            ox,
2663            oy,
2664            s->sprite_delta[0][0], s->sprite_delta[0][1],
2665            s->sprite_delta[1][0], s->sprite_delta[1][1],
2666            a+1, (1<<(2*a+1)) - s->no_rounding,
2667            s->h_edge_pos>>1, s->v_edge_pos>>1);
2668 }
2669
2670 /**
2671  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2672  * @param buf destination buffer
2673  * @param src source buffer
2674  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2675  * @param block_w width of block
2676  * @param block_h height of block
2677  * @param src_x x coordinate of the top left sample of the block in the source buffer
2678  * @param src_y y coordinate of the top left sample of the block in the source buffer
2679  * @param w width of the source buffer
2680  * @param h height of the source buffer
2681  */
2682 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2683                                     int src_x, int src_y, int w, int h){
2684     int x, y;
2685     int start_y, start_x, end_y, end_x;
2686
2687     if(src_y>= h){
2688         src+= (h-1-src_y)*linesize;
2689         src_y=h-1;
2690     }else if(src_y<=-block_h){
2691         src+= (1-block_h-src_y)*linesize;
2692         src_y=1-block_h;
2693     }
2694     if(src_x>= w){
2695         src+= (w-1-src_x);
2696         src_x=w-1;
2697     }else if(src_x<=-block_w){
2698         src+= (1-block_w-src_x);
2699         src_x=1-block_w;
2700     }
2701
2702     start_y= FFMAX(0, -src_y);
2703     start_x= FFMAX(0, -src_x);
2704     end_y= FFMIN(block_h, h-src_y);
2705     end_x= FFMIN(block_w, w-src_x);
2706
2707     // copy existing part
2708     for(y=start_y; y<end_y; y++){
2709         for(x=start_x; x<end_x; x++){
2710             buf[x + y*linesize]= src[x + y*linesize];
2711         }
2712     }
2713
2714     //top
2715     for(y=0; y<start_y; y++){
2716         for(x=start_x; x<end_x; x++){
2717             buf[x + y*linesize]= buf[x + start_y*linesize];
2718         }
2719     }
2720
2721     //bottom
2722     for(y=end_y; y<block_h; y++){
2723         for(x=start_x; x<end_x; x++){
2724             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2725         }
2726     }
2727
2728     for(y=0; y<block_h; y++){
2729        //left
2730         for(x=0; x<start_x; x++){
2731             buf[x + y*linesize]= buf[start_x + y*linesize];
2732         }
2733
2734        //right
2735         for(x=end_x; x<block_w; x++){
2736             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2737         }
2738     }
2739 }
2740
2741 static inline int hpel_motion(MpegEncContext *s,
2742                                   uint8_t *dest, uint8_t *src,
2743                                   int field_based, int field_select,
2744                                   int src_x, int src_y,
2745                                   int width, int height, int stride,
2746                                   int h_edge_pos, int v_edge_pos,
2747                                   int w, int h, op_pixels_func *pix_op,
2748                                   int motion_x, int motion_y)
2749 {
2750     int dxy;
2751     int emu=0;
2752
2753     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2754     src_x += motion_x >> 1;
2755     src_y += motion_y >> 1;
2756
2757     /* WARNING: do no forget half pels */
2758     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2759     if (src_x == width)
2760         dxy &= ~1;
2761     src_y = clip(src_y, -16, height);
2762     if (src_y == height)
2763         dxy &= ~2;
2764     src += src_y * stride + src_x;
2765
2766     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2767         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2768            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2769             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2770                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2771             src= s->edge_emu_buffer;
2772             emu=1;
2773         }
2774     }
2775     if(field_select)
2776         src += s->linesize;
2777     pix_op[dxy](dest, src, stride, h);
2778     return emu;
2779 }
2780
2781 static inline int hpel_motion_lowres(MpegEncContext *s,
2782                                   uint8_t *dest, uint8_t *src,
2783                                   int field_based, int field_select,
2784                                   int src_x, int src_y,
2785                                   int width, int height, int stride,
2786                                   int h_edge_pos, int v_edge_pos,
2787                                   int w, int h, h264_chroma_mc_func *pix_op,
2788                                   int motion_x, int motion_y)
2789 {
2790     const int lowres= s->avctx->lowres;
2791     const int s_mask= (2<<lowres)-1;
2792     int emu=0;
2793     int sx, sy;
2794
2795     if(s->quarter_sample){
2796         motion_x/=2;
2797         motion_y/=2;
2798     }
2799
2800     sx= motion_x & s_mask;
2801     sy= motion_y & s_mask;
2802     src_x += motion_x >> (lowres+1);
2803     src_y += motion_y >> (lowres+1);
2804
2805     src += src_y * stride + src_x;
2806
2807     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2808        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2809         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2810                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2811         src= s->edge_emu_buffer;
2812         emu=1;
2813     }
2814
2815     sx <<= 2 - lowres;
2816     sy <<= 2 - lowres;
2817     if(field_select)
2818         src += s->linesize;
2819     pix_op[lowres](dest, src, stride, h, sx, sy);
2820     return emu;
2821 }
2822
2823 /* apply one mpeg motion vector to the three components */
2824 static always_inline void mpeg_motion(MpegEncContext *s,
2825                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2826                                int field_based, int bottom_field, int field_select,
2827                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2828                                int motion_x, int motion_y, int h)
2829 {
2830     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2831     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2832
2833 #if 0
2834 if(s->quarter_sample)
2835 {
2836     motion_x>>=1;
2837     motion_y>>=1;
2838 }
2839 #endif
2840
2841     v_edge_pos = s->v_edge_pos >> field_based;
2842     linesize   = s->current_picture.linesize[0] << field_based;
2843     uvlinesize = s->current_picture.linesize[1] << field_based;
2844
2845     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2846     src_x = s->mb_x* 16               + (motion_x >> 1);
2847     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2848
2849     if (s->out_format == FMT_H263) {
2850         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2851             mx = (motion_x>>1)|(motion_x&1);
2852             my = motion_y >>1;
2853             uvdxy = ((my & 1) << 1) | (mx & 1);
2854             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2855             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2856         }else{
2857             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2858             uvsrc_x = src_x>>1;
2859             uvsrc_y = src_y>>1;
2860         }
2861     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2862         mx = motion_x / 4;
2863         my = motion_y / 4;
2864         uvdxy = 0;
2865         uvsrc_x = s->mb_x*8 + mx;
2866         uvsrc_y = s->mb_y*8 + my;
2867     } else {
2868         if(s->chroma_y_shift){
2869             mx = motion_x / 2;
2870             my = motion_y / 2;
2871             uvdxy = ((my & 1) << 1) | (mx & 1);
2872             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2873             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2874         } else {
2875             if(s->chroma_x_shift){
2876             //Chroma422
2877                 mx = motion_x / 2;
2878                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2879                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2880                 uvsrc_y = src_y;
2881             } else {
2882             //Chroma444
2883                 uvdxy = dxy;
2884                 uvsrc_x = src_x;
2885                 uvsrc_y = src_y;
2886             }
2887         }
2888     }
2889
2890     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2891     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2892     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2893
2894     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2895        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2896             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2897                s->codec_id == CODEC_ID_MPEG1VIDEO){
2898                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2899                 return ;
2900             }
2901             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2902                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2903             ptr_y = s->edge_emu_buffer;
2904             if(!(s->flags&CODEC_FLAG_GRAY)){
2905                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2906                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2907                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2908                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2909                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2910                 ptr_cb= uvbuf;
2911                 ptr_cr= uvbuf+16;
2912             }
2913     }
2914
2915     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2916         dest_y += s->linesize;
2917         dest_cb+= s->uvlinesize;
2918         dest_cr+= s->uvlinesize;
2919     }
2920
2921     if(field_select){
2922         ptr_y += s->linesize;
2923         ptr_cb+= s->uvlinesize;
2924         ptr_cr+= s->uvlinesize;
2925     }
2926
2927     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2928
2929     if(!(s->flags&CODEC_FLAG_GRAY)){
2930         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2931         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2932     }
2933 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
2934     if(s->out_format == FMT_H261){
2935         ff_h261_loop_filter(s);
2936     }
2937 #endif
2938 }
2939
2940 /* apply one mpeg motion vector to the three components */
2941 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2942                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2943                                int field_based, int bottom_field, int field_select,
2944                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2945                                int motion_x, int motion_y, int h)
2946 {
2947     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2948     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2949     const int lowres= s->avctx->lowres;
2950     const int block_s= 8>>lowres;
2951     const int s_mask= (2<<lowres)-1;
2952     const int h_edge_pos = s->h_edge_pos >> lowres;
2953     const int v_edge_pos = s->v_edge_pos >> lowres;
2954     linesize   = s->current_picture.linesize[0] << field_based;
2955     uvlinesize = s->current_picture.linesize[1] << field_based;
2956
2957     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
2958         motion_x/=2;
2959         motion_y/=2;
2960     }
2961
2962     if(field_based){
2963         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
2964     }
2965
2966     sx= motion_x & s_mask;
2967     sy= motion_y & s_mask;
2968     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
2969     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
2970
2971     if (s->out_format == FMT_H263) {
2972         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
2973         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
2974         uvsrc_x = src_x>>1;
2975         uvsrc_y = src_y>>1;
2976     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2977         mx = motion_x / 4;
2978         my = motion_y / 4;
2979         uvsx = (2*mx) & s_mask;
2980         uvsy = (2*my) & s_mask;
2981         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
2982         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
2983     } else {
2984         mx = motion_x / 2;
2985         my = motion_y / 2;
2986         uvsx = mx & s_mask;
2987         uvsy = my & s_mask;
2988         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
2989         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
2990     }
2991
2992     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2993     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2994     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2995
2996     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
2997        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2998             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2999                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3000             ptr_y = s->edge_emu_buffer;
3001             if(!(s->flags&CODEC_FLAG_GRAY)){
3002                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3003                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3004                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3005                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3006                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3007                 ptr_cb= uvbuf;
3008                 ptr_cr= uvbuf+16;
3009             }
3010     }
3011
3012     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3013         dest_y += s->linesize;
3014         dest_cb+= s->uvlinesize;
3015         dest_cr+= s->uvlinesize;
3016     }
3017
3018     if(field_select){
3019         ptr_y += s->linesize;
3020         ptr_cb+= s->uvlinesize;
3021         ptr_cr+= s->uvlinesize;
3022     }
3023
3024     sx <<= 2 - lowres;
3025     sy <<= 2 - lowres;
3026     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3027
3028     if(!(s->flags&CODEC_FLAG_GRAY)){
3029         uvsx <<= 2 - lowres;
3030         uvsy <<= 2 - lowres;
3031         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3032         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3033     }
3034     //FIXME h261 lowres loop filter
3035 }
3036
3037 //FIXME move to dsputil, avg variant, 16x16 version
3038 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3039     int x;
3040     uint8_t * const top   = src[1];
3041     uint8_t * const left  = src[2];
3042     uint8_t * const mid   = src[0];
3043     uint8_t * const right = src[3];
3044     uint8_t * const bottom= src[4];
3045 #define OBMC_FILTER(x, t, l, m, r, b)\
3046     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3047 #define OBMC_FILTER4(x, t, l, m, r, b)\
3048     OBMC_FILTER(x         , t, l, m, r, b);\
3049     OBMC_FILTER(x+1       , t, l, m, r, b);\
3050     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3051     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3052
3053     x=0;
3054     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3055     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3056     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3057     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3058     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3059     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3060     x+= stride;
3061     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3062     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3063     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3064     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3065     x+= stride;
3066     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3067     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3068     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3069     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3070     x+= 2*stride;
3071     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3072     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3073     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3074     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3075     x+= 2*stride;
3076     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3077     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3078     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3079     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3080     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3081     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3082     x+= stride;
3083     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3084     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3085     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3086     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3087 }
3088
3089 /* obmc for 1 8x8 luma block */
3090 static inline void obmc_motion(MpegEncContext *s,
3091                                uint8_t *dest, uint8_t *src,
3092                                int src_x, int src_y,
3093                                op_pixels_func *pix_op,
3094                                int16_t mv[5][2]/* mid top left right bottom*/)
3095 #define MID    0
3096 {
3097     int i;
3098     uint8_t *ptr[5];
3099
3100     assert(s->quarter_sample==0);
3101
3102     for(i=0; i<5; i++){
3103         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3104             ptr[i]= ptr[MID];
3105         }else{
3106             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3107             hpel_motion(s, ptr[i], src, 0, 0,
3108                         src_x, src_y,
3109                         s->width, s->height, s->linesize,
3110                         s->h_edge_pos, s->v_edge_pos,
3111                         8, 8, pix_op,
3112                         mv[i][0], mv[i][1]);
3113         }
3114     }
3115
3116     put_obmc(dest, ptr, s->linesize);
3117 }
3118
3119 static inline void qpel_motion(MpegEncContext *s,
3120                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3121                                int field_based, int bottom_field, int field_select,
3122                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3123                                qpel_mc_func (*qpix_op)[16],
3124                                int motion_x, int motion_y, int h)
3125 {
3126     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3127     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3128
3129     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3130     src_x = s->mb_x *  16                 + (motion_x >> 2);
3131     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3132
3133     v_edge_pos = s->v_edge_pos >> field_based;
3134     linesize = s->linesize << field_based;
3135     uvlinesize = s->uvlinesize << field_based;
3136
3137     if(field_based){
3138         mx= motion_x/2;
3139         my= motion_y>>1;
3140     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3141         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3142         mx= (motion_x>>1) + rtab[motion_x&7];
3143         my= (motion_y>>1) + rtab[motion_y&7];
3144     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3145         mx= (motion_x>>1)|(motion_x&1);
3146         my= (motion_y>>1)|(motion_y&1);
3147     }else{
3148         mx= motion_x/2;
3149         my= motion_y/2;
3150     }
3151     mx= (mx>>1)|(mx&1);
3152     my= (my>>1)|(my&1);
3153
3154     uvdxy= (mx&1) | ((my&1)<<1);
3155     mx>>=1;
3156     my>>=1;
3157
3158     uvsrc_x = s->mb_x *  8                 + mx;
3159     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3160
3161     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3162     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3163     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3164
3165     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3166        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3167         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3168                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3169         ptr_y= s->edge_emu_buffer;
3170         if(!(s->flags&CODEC_FLAG_GRAY)){
3171             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3172             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3173                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3174             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3175                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3176             ptr_cb= uvbuf;
3177             ptr_cr= uvbuf + 16;
3178         }
3179     }
3180
3181     if(!field_based)
3182         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3183     else{
3184         if(bottom_field){
3185             dest_y += s->linesize;
3186             dest_cb+= s->uvlinesize;
3187             dest_cr+= s->uvlinesize;
3188         }
3189
3190         if(field_select){
3191             ptr_y  += s->linesize;
3192             ptr_cb += s->uvlinesize;
3193             ptr_cr += s->uvlinesize;
3194         }
3195         //damn interlaced mode
3196         //FIXME boundary mirroring is not exactly correct here
3197         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3198         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3199     }
3200     if(!(s->flags&CODEC_FLAG_GRAY)){
3201         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3202         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3203     }
3204 }
3205
3206 inline int ff_h263_round_chroma(int x){
3207     if (x >= 0)
3208         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3209     else {
3210         x = -x;
3211         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3212     }
3213 }
3214
3215 /**
3216  * h263 chorma 4mv motion compensation.
3217  */
3218 static inline void chroma_4mv_motion(MpegEncContext *s,
3219                                      uint8_t *dest_cb, uint8_t *dest_cr,
3220                                      uint8_t **ref_picture,
3221                                      op_pixels_func *pix_op,
3222                                      int mx, int my){
3223     int dxy, emu=0, src_x, src_y, offset;
3224     uint8_t *ptr;
3225
3226     /* In case of 8X8, we construct a single chroma motion vector
3227        with a special rounding */
3228     mx= ff_h263_round_chroma(mx);
3229     my= ff_h263_round_chroma(my);
3230
3231     dxy = ((my & 1) << 1) | (mx & 1);
3232     mx >>= 1;
3233     my >>= 1;
3234
3235     src_x = s->mb_x * 8 + mx;
3236     src_y = s->mb_y * 8 + my;
3237     src_x = clip(src_x, -8, s->width/2);
3238     if (src_x == s->width/2)
3239         dxy &= ~1;
3240     src_y = clip(src_y, -8, s->height/2);
3241     if (src_y == s->height/2)
3242         dxy &= ~2;
3243
3244     offset = (src_y * (s->uvlinesize)) + src_x;
3245     ptr = ref_picture[1] + offset;
3246     if(s->flags&CODEC_FLAG_EMU_EDGE){
3247         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3248            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3249             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3250             ptr= s->edge_emu_buffer;
3251             emu=1;
3252         }
3253     }
3254     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3255
3256     ptr = ref_picture[2] + offset;
3257     if(emu){
3258         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3259         ptr= s->edge_emu_buffer;
3260     }
3261     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3262 }
3263
3264 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3265                                      uint8_t *dest_cb, uint8_t *dest_cr,
3266                                      uint8_t **ref_picture,
3267                                      h264_chroma_mc_func *pix_op,
3268                                      int mx, int my){
3269     const int lowres= s->avctx->lowres;
3270     const int block_s= 8>>lowres;
3271     const int s_mask= (2<<lowres)-1;
3272     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3273     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3274     int emu=0, src_x, src_y, offset, sx, sy;
3275     uint8_t *ptr;
3276
3277     if(s->quarter_sample){
3278         mx/=2;
3279         my/=2;
3280     }
3281
3282     /* In case of 8X8, we construct a single chroma motion vector
3283        with a special rounding */
3284     mx= ff_h263_round_chroma(mx);
3285     my= ff_h263_round_chroma(my);
3286
3287     sx= mx & s_mask;
3288     sy= my & s_mask;
3289     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3290     src_y = s->mb_y*block_s + (my >> (lowres+1));
3291
3292     offset = src_y * s->uvlinesize + src_x;
3293     ptr = ref_picture[1] + offset;
3294     if(s->flags&CODEC_FLAG_EMU_EDGE){
3295         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3296            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3297             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3298             ptr= s->edge_emu_buffer;
3299             emu=1;
3300         }
3301     }
3302     sx <<= 2 - lowres;
3303     sy <<= 2 - lowres;
3304     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3305
3306     ptr = ref_picture[2] + offset;
3307     if(emu){
3308         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3309         ptr= s->edge_emu_buffer;
3310     }
3311     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3312 }
3313
3314 /**
3315  * motion compensation of a single macroblock
3316  * @param s context
3317  * @param dest_y luma destination pointer
3318  * @param dest_cb chroma cb/u destination pointer
3319  * @param dest_cr chroma cr/v destination pointer
3320  * @param dir direction (0->forward, 1->backward)
3321  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3322  * @param pic_op halfpel motion compensation function (average or put normally)
3323  * @param pic_op qpel motion compensation function (average or put normally)
3324  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3325  */
3326 static inline void MPV_motion(MpegEncContext *s,
3327                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3328                               int dir, uint8_t **ref_picture,
3329                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3330 {
3331     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3332     int mb_x, mb_y, i;
3333     uint8_t *ptr, *dest;
3334
3335     mb_x = s->mb_x;
3336     mb_y = s->mb_y;
3337
3338     if(s->obmc && s->pict_type != B_TYPE){
3339         int16_t mv_cache[4][4][2];
3340         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3341         const int mot_stride= s->b8_stride;
3342         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3343
3344         assert(!s->mb_skipped);
3345
3346         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3347         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3348         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3349
3350         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3351             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3352         }else{
3353             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3354         }
3355
3356         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3357             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3358             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3359         }else{
3360             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3361             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3362         }
3363
3364         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3365             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3366             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3367         }else{
3368             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3369             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3370         }
3371
3372         mx = 0;
3373         my = 0;
3374         for(i=0;i<4;i++) {
3375             const int x= (i&1)+1;
3376             const int y= (i>>1)+1;
3377             int16_t mv[5][2]= {
3378                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3379                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3380                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3381                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3382                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3383             //FIXME cleanup
3384             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3385                         ref_picture[0],
3386                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3387                         pix_op[1],
3388                         mv);
3389
3390             mx += mv[0][0];
3391             my += mv[0][1];
3392         }
3393         if(!(s->flags&CODEC_FLAG_GRAY))
3394             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3395
3396         return;
3397     }
3398
3399     switch(s->mv_type) {
3400     case MV_TYPE_16X16:
3401         if(s->mcsel){
3402             if(s->real_sprite_warping_points==1){
3403                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3404                             ref_picture);
3405             }else{
3406                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3407                             ref_picture);
3408             }
3409         }else if(s->quarter_sample){
3410             qpel_motion(s, dest_y, dest_cb, dest_cr,
3411                         0, 0, 0,
3412                         ref_picture, pix_op, qpix_op,
3413                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3414         }else if(s->mspel){
3415             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3416                         ref_picture, pix_op,
3417                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3418         }else
3419         {
3420             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3421                         0, 0, 0,
3422                         ref_picture, pix_op,
3423                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3424         }
3425         break;
3426     case MV_TYPE_8X8:
3427         mx = 0;
3428         my = 0;
3429         if(s->quarter_sample){
3430             for(i=0;i<4;i++) {
3431                 motion_x = s->mv[dir][i][0];
3432                 motion_y = s->mv[dir][i][1];
3433
3434                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3435                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3436                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3437
3438                 /* WARNING: do no forget half pels */
3439                 src_x = clip(src_x, -16, s->width);
3440                 if (src_x == s->width)
3441                     dxy &= ~3;
3442                 src_y = clip(src_y, -16, s->height);
3443                 if (src_y == s->height)
3444                     dxy &= ~12;
3445
3446                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3447                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3448                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3449                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3450                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3451                         ptr= s->edge_emu_buffer;
3452                     }
3453                 }
3454                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3455                 qpix_op[1][dxy](dest, ptr, s->linesize);
3456
3457                 mx += s->mv[dir][i][0]/2;
3458                 my += s->mv[dir][i][1]/2;
3459             }
3460         }else{
3461             for(i=0;i<4;i++) {
3462                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3463                             ref_picture[0], 0, 0,
3464                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3465                             s->width, s->height, s->linesize,
3466                             s->h_edge_pos, s->v_edge_pos,
3467                             8, 8, pix_op[1],
3468                             s->mv[dir][i][0], s->mv[dir][i][1]);
3469
3470                 mx += s->mv[dir][i][0];
3471                 my += s->mv[dir][i][1];
3472             }
3473         }
3474
3475         if(!(s->flags&CODEC_FLAG_GRAY))
3476             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3477         break;
3478     case MV_TYPE_FIELD:
3479         if (s->picture_structure == PICT_FRAME) {
3480             if(s->quarter_sample){
3481                 for(i=0; i<2; i++){
3482                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3483                                 1, i, s->field_select[dir][i],
3484                                 ref_picture, pix_op, qpix_op,
3485                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3486                 }
3487             }else{
3488                 /* top field */
3489                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3490                             1, 0, s->field_select[dir][0],
3491                             ref_picture, pix_op,
3492                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3493                 /* bottom field */
3494                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3495                             1, 1, s->field_select[dir][1],
3496                             ref_picture, pix_op,
3497                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3498             }
3499         } else {
3500             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3501                 ref_picture= s->current_picture_ptr->data;
3502             }
3503
3504             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3505                         0, 0, s->field_select[dir][0],
3506                         ref_picture, pix_op,
3507                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3508         }
3509         break;
3510     case MV_TYPE_16X8:
3511         for(i=0; i<2; i++){
3512             uint8_t ** ref2picture;
3513
3514             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3515                 ref2picture= ref_picture;
3516             }else{
3517                 ref2picture= s->current_picture_ptr->data;
3518             }
3519
3520             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3521                         0, 0, s->field_select[dir][i],
3522                         ref2picture, pix_op,
3523                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3524
3525             dest_y += 16*s->linesize;
3526             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3527             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3528         }
3529         break;
3530     case MV_TYPE_DMV:
3531         if(s->picture_structure == PICT_FRAME){
3532             for(i=0; i<2; i++){
3533                 int j;
3534                 for(j=0; j<2; j++){
3535                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3536                                 1, j, j^i,
3537                                 ref_picture, pix_op,
3538                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3539                 }
3540                 pix_op = s->dsp.avg_pixels_tab;
3541             }
3542         }else{
3543             for(i=0; i<2; i++){
3544                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3545                             0, 0, s->picture_structure != i+1,
3546                             ref_picture, pix_op,
3547                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3548
3549                 // after put we make avg of the same block
3550                 pix_op=s->dsp.avg_pixels_tab;
3551
3552                 //opposite parity is always in the same frame if this is second field
3553                 if(!s->first_field){
3554                     ref_picture = s->current_picture_ptr->data;
3555                 }
3556             }
3557         }
3558     break;
3559     default: assert(0);
3560     }
3561 }
3562
3563 /**
3564  * motion compensation of a single macroblock
3565  * @param s context
3566  * @param dest_y luma destination pointer
3567  * @param dest_cb chroma cb/u destination pointer
3568  * @param dest_cr chroma cr/v destination pointer
3569  * @param dir direction (0->forward, 1->backward)
3570  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3571  * @param pic_op halfpel motion compensation function (average or put normally)
3572  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3573  */
3574 static inline void MPV_motion_lowres(MpegEncContext *s,
3575                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3576                               int dir, uint8_t **ref_picture,
3577                               h264_chroma_mc_func *pix_op)
3578 {
3579     int mx, my;
3580     int mb_x, mb_y, i;
3581     const int lowres= s->avctx->lowres;
3582     const int block_s= 8>>lowres;
3583
3584     mb_x = s->mb_x;
3585     mb_y = s->mb_y;
3586
3587     switch(s->mv_type) {
3588     case MV_TYPE_16X16:
3589         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3590                     0, 0, 0,
3591                     ref_picture, pix_op,
3592                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3593         break;
3594     case MV_TYPE_8X8:
3595         mx = 0;
3596         my = 0;
3597             for(i=0;i<4;i++) {
3598                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3599                             ref_picture[0], 0, 0,
3600                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3601                             s->width, s->height, s->linesize,
3602                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3603                             block_s, block_s, pix_op,
3604                             s->mv[dir][i][0], s->mv[dir][i][1]);
3605
3606                 mx += s->mv[dir][i][0];
3607                 my += s->mv[dir][i][1];
3608             }
3609
3610         if(!(s->flags&CODEC_FLAG_GRAY))
3611             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3612         break;
3613     case MV_TYPE_FIELD:
3614         if (s->picture_structure == PICT_FRAME) {
3615             /* top field */
3616             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3617                         1, 0, s->field_select[dir][0],
3618                         ref_picture, pix_op,
3619                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3620             /* bottom field */
3621             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3622                         1, 1, s->field_select[dir][1],
3623                         ref_picture, pix_op,
3624                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3625         } else {
3626             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3627                 ref_picture= s->current_picture_ptr->data;
3628             }
3629
3630             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3631                         0, 0, s->field_select[dir][0],
3632                         ref_picture, pix_op,
3633                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3634         }
3635         break;
3636     case MV_TYPE_16X8:
3637         for(i=0; i<2; i++){
3638             uint8_t ** ref2picture;
3639
3640             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3641                 ref2picture= ref_picture;
3642             }else{
3643                 ref2picture= s->current_picture_ptr->data;
3644             }
3645
3646             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3647                         0, 0, s->field_select[dir][i],
3648                         ref2picture, pix_op,
3649                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3650
3651             dest_y += 2*block_s*s->linesize;
3652             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3653             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3654         }
3655         break;
3656     case MV_TYPE_DMV:
3657         if(s->picture_structure == PICT_FRAME){
3658             for(i=0; i<2; i++){
3659                 int j;
3660                 for(j=0; j<2; j++){
3661                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3662                                 1, j, j^i,
3663                                 ref_picture, pix_op,
3664                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3665                 }
3666                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3667             }
3668         }else{
3669             for(i=0; i<2; i++){
3670                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3671                             0, 0, s->picture_structure != i+1,
3672                             ref_picture, pix_op,
3673                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3674
3675                 // after put we make avg of the same block
3676                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3677
3678                 //opposite parity is always in the same frame if this is second field
3679                 if(!s->first_field){
3680                     ref_picture = s->current_picture_ptr->data;
3681                 }
3682             }
3683         }
3684     break;
3685     default: assert(0);
3686     }
3687 }
3688
3689 /* put block[] to dest[] */
3690 static inline void put_dct(MpegEncContext *s,
3691                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3692 {
3693     s->dct_unquantize_intra(s, block, i, qscale);
3694     s->dsp.idct_put (dest, line_size, block);
3695 }
3696
3697 /* add block[] to dest[] */
3698 static inline void add_dct(MpegEncContext *s,
3699                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3700 {
3701     if (s->block_last_index[i] >= 0) {
3702         s->dsp.idct_add (dest, line_size, block);
3703     }
3704 }
3705
3706 static inline void add_dequant_dct(MpegEncContext *s,
3707                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3708 {
3709     if (s->block_last_index[i] >= 0) {
3710         s->dct_unquantize_inter(s, block, i, qscale);
3711
3712         s->dsp.idct_add (dest, line_size, block);
3713     }
3714 }
3715
3716 /**
3717  * cleans dc, ac, coded_block for the current non intra MB
3718  */
3719 void ff_clean_intra_table_entries(MpegEncContext *s)
3720 {
3721     int wrap = s->b8_stride;
3722     int xy = s->block_index[0];
3723
3724     s->dc_val[0][xy           ] =
3725     s->dc_val[0][xy + 1       ] =
3726     s->dc_val[0][xy     + wrap] =
3727     s->dc_val[0][xy + 1 + wrap] = 1024;
3728     /* ac pred */
3729     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3730     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3731     if (s->msmpeg4_version>=3) {
3732         s->coded_block[xy           ] =
3733         s->coded_block[xy + 1       ] =
3734         s->coded_block[xy     + wrap] =
3735         s->coded_block[xy + 1 + wrap] = 0;
3736     }
3737     /* chroma */
3738     wrap = s->mb_stride;
3739     xy = s->mb_x + s->mb_y * wrap;
3740     s->dc_val[1][xy] =
3741     s->dc_val[2][xy] = 1024;
3742     /* ac pred */
3743     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3744     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3745
3746     s->mbintra_table[xy]= 0;
3747 }
3748
3749 /* generic function called after a macroblock has been parsed by the
3750    decoder or after it has been encoded by the encoder.
3751
3752    Important variables used:
3753    s->mb_intra : true if intra macroblock
3754    s->mv_dir   : motion vector direction
3755    s->mv_type  : motion vector type
3756    s->mv       : motion vector
3757    s->interlaced_dct : true if interlaced dct used (mpeg2)
3758  */
3759 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3760 {
3761     int mb_x, mb_y;
3762     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3763 #ifdef HAVE_XVMC
3764     if(s->avctx->xvmc_acceleration){
3765         XVMC_decode_mb(s);//xvmc uses pblocks
3766         return;
3767     }
3768 #endif
3769
3770     mb_x = s->mb_x;
3771     mb_y = s->mb_y;
3772
3773     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3774        /* save DCT coefficients */
3775        int i,j;
3776        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3777        for(i=0; i<6; i++)
3778            for(j=0; j<64; j++)
3779                *dct++ = block[i][s->dsp.idct_permutation[j]];
3780     }
3781
3782     s->current_picture.qscale_table[mb_xy]= s->qscale;
3783
3784     /* update DC predictors for P macroblocks */
3785     if (!s->mb_intra) {
3786         if (s->h263_pred || s->h263_aic) {
3787             if(s->mbintra_table[mb_xy])
3788                 ff_clean_intra_table_entries(s);
3789         } else {
3790             s->last_dc[0] =
3791             s->last_dc[1] =
3792             s->last_dc[2] = 128 << s->intra_dc_precision;
3793         }
3794     }
3795     else if (s->h263_pred || s->h263_aic)
3796         s->mbintra_table[mb_xy]=1;
3797
3798     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3799         uint8_t *dest_y, *dest_cb, *dest_cr;
3800         int dct_linesize, dct_offset;
3801         op_pixels_func (*op_pix)[4];
3802         qpel_mc_func (*op_qpix)[16];
3803         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3804         const int uvlinesize= s->current_picture.linesize[1];
3805         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3806         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3807
3808         /* avoid copy if macroblock skipped in last frame too */
3809         /* skip only during decoding as we might trash the buffers during encoding a bit */
3810         if(!s->encoding){
3811             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3812             const int age= s->current_picture.age;
3813
3814             assert(age);
3815
3816             if (s->mb_skipped) {
3817                 s->mb_skipped= 0;
3818                 assert(s->pict_type!=I_TYPE);
3819
3820                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3821                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3822
3823                 /* if previous was skipped too, then nothing to do !  */
3824                 if (*mbskip_ptr >= age && s->current_picture.reference){
3825                     return;
3826                 }
3827             } else if(!s->current_picture.reference){
3828                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3829                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3830             } else{
3831                 *mbskip_ptr = 0; /* not skipped */
3832             }
3833         }
3834
3835         dct_linesize = linesize << s->interlaced_dct;
3836         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3837
3838         if(readable){
3839             dest_y=  s->dest[0];
3840             dest_cb= s->dest[1];
3841             dest_cr= s->dest[2];
3842         }else{
3843             dest_y = s->b_scratchpad;
3844             dest_cb= s->b_scratchpad+16*linesize;
3845             dest_cr= s->b_scratchpad+32*linesize;
3846         }
3847
3848         if (!s->mb_intra) {
3849             /* motion handling */
3850             /* decoding or more than one mb_type (MC was already done otherwise) */
3851             if(!s->encoding){
3852                 if(lowres_flag){
3853                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3854
3855                     if (s->mv_dir & MV_DIR_FORWARD) {
3856                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3857                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3858                     }
3859                     if (s->mv_dir & MV_DIR_BACKWARD) {
3860                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3861                     }
3862                 }else{
3863                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3864                         op_pix = s->dsp.put_pixels_tab;
3865                         op_qpix= s->dsp.put_qpel_pixels_tab;
3866                     }else{
3867                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3868                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3869                     }
3870                     if (s->mv_dir & MV_DIR_FORWARD) {
3871                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3872                         op_pix = s->dsp.avg_pixels_tab;
3873                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3874                     }
3875                     if (s->mv_dir & MV_DIR_BACKWARD) {
3876                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3877                     }
3878                 }
3879             }
3880
3881             /* skip dequant / idct if we are really late ;) */
3882             if(s->hurry_up>1) goto skip_idct;
3883             if(s->avctx->skip_idct){
3884                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3885                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3886                    || s->avctx->skip_idct >= AVDISCARD_ALL)
3887                     goto skip_idct;
3888             }
3889
3890             /* add dct residue */
3891             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3892                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3893                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3894                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3895                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3896                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3897
3898                 if(!(s->flags&CODEC_FLAG_GRAY)){
3899                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3900                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3901                 }
3902             } else if(s->codec_id != CODEC_ID_WMV2){
3903                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3904                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3905                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3906                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3907
3908                 if(!(s->flags&CODEC_FLAG_GRAY)){
3909                     if(s->chroma_y_shift){//Chroma420
3910                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3911                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3912                     }else{
3913                         //chroma422
3914                         dct_linesize = uvlinesize << s->interlaced_dct;
3915                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3916
3917                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3918                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3919                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3920                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3921                         if(!s->chroma_x_shift){//Chroma444
3922                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3923                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3924                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3925                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3926                         }
3927                     }
3928                 }//fi gray
3929             }
3930             else{
3931                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3932             }
3933         } else {
3934             /* dct only in intra block */
3935             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3936                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3937                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3938                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3939                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3940
3941                 if(!(s->flags&CODEC_FLAG_GRAY)){
3942                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3943                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3944                 }
3945             }else{
3946                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
3947                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
3948                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
3949                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
3950
3951                 if(!(s->flags&CODEC_FLAG_GRAY)){
3952                     if(s->chroma_y_shift){
3953                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3954                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3955                     }else{
3956
3957                         dct_linesize = uvlinesize << s->interlaced_dct;
3958                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3959
3960                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
3961                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
3962                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
3963                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
3964                         if(!s->chroma_x_shift){//Chroma444
3965                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
3966                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
3967                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
3968                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
3969                         }
3970                     }
3971                 }//gray
3972             }
3973         }
3974 skip_idct:
3975         if(!readable){
3976             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
3977             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
3978             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
3979         }
3980     }
3981 }
3982
3983 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
3984     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
3985     else                  MPV_decode_mb_internal(s, block, 0);
3986 }
3987
3988 #ifdef CONFIG_ENCODERS
3989
3990 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
3991 {
3992     static const char tab[64]=
3993         {3,2,2,1,1,1,1,1,
3994          1,1,1,1,1,1,1,1,
3995          1,1,1,1,1,1,1,1,
3996          0,0,0,0,0,0,0,0,
3997          0,0,0,0,0,0,0,0,
3998          0,0,0,0,0,0,0,0,
3999          0,0,0,0,0,0,0,0,
4000          0,0,0,0,0,0,0,0};
4001     int score=0;
4002     int run=0;
4003     int i;
4004     DCTELEM *block= s->block[n];
4005     const int last_index= s->block_last_index[n];
4006     int skip_dc;
4007
4008     if(threshold<0){
4009         skip_dc=0;
4010         threshold= -threshold;
4011     }else
4012         skip_dc=1;
4013
4014     /* are all which we could set to zero are allready zero? */
4015     if(last_index<=skip_dc - 1) return;
4016
4017     for(i=0; i<=last_index; i++){
4018         const int j = s->intra_scantable.permutated[i];
4019         const int level = ABS(block[j]);
4020         if(level==1){
4021             if(skip_dc && i==0) continue;
4022             score+= tab[run];
4023             run=0;
4024         }else if(level>1){
4025             return;
4026         }else{
4027             run++;
4028         }
4029     }
4030     if(score >= threshold) return;
4031     for(i=skip_dc; i<=last_index; i++){
4032         const int j = s->intra_scantable.permutated[i];
4033         block[j]=0;
4034     }
4035     if(block[0]) s->block_last_index[n]= 0;
4036     else         s->block_last_index[n]= -1;
4037 }
4038
4039 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4040 {
4041     int i;
4042     const int maxlevel= s->max_qcoeff;
4043     const int minlevel= s->min_qcoeff;
4044     int overflow=0;
4045
4046     if(s->mb_intra){
4047         i=1; //skip clipping of intra dc
4048     }else
4049         i=0;
4050
4051     for(;i<=last_index; i++){
4052         const int j= s->intra_scantable.permutated[i];
4053         int level = block[j];
4054
4055         if     (level>maxlevel){
4056             level=maxlevel;
4057             overflow++;
4058         }else if(level<minlevel){
4059             level=minlevel;
4060             overflow++;
4061         }
4062
4063         block[j]= level;
4064     }
4065
4066     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4067         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4068 }
4069
4070 #endif //CONFIG_ENCODERS
4071
4072 /**
4073  *
4074  * @param h is the normal height, this will be reduced automatically if needed for the last row
4075  */
4076 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4077     if (s->avctx->draw_horiz_band) {
4078         AVFrame *src;
4079         int offset[4];
4080
4081         if(s->picture_structure != PICT_FRAME){
4082             h <<= 1;
4083             y <<= 1;
4084             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4085         }
4086
4087         h= FFMIN(h, s->avctx->height - y);
4088
4089         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4090             src= (AVFrame*)s->current_picture_ptr;
4091         else if(s->last_picture_ptr)
4092             src= (AVFrame*)s->last_picture_ptr;
4093         else
4094             return;
4095
4096         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4097             offset[0]=
4098             offset[1]=
4099             offset[2]=
4100             offset[3]= 0;
4101         }else{
4102             offset[0]= y * s->linesize;;
4103             offset[1]=
4104             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4105             offset[3]= 0;
4106         }
4107
4108         emms_c();
4109
4110         s->avctx->draw_horiz_band(s->avctx, src, offset,
4111                                   y, s->picture_structure, h);
4112     }
4113 }
4114
4115 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4116     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4117     const int uvlinesize= s->current_picture.linesize[1];
4118     const int mb_size= 4 - s->avctx->lowres;
4119
4120     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4121     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4122     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4123     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4124     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4125     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4126     //block_index is not used by mpeg2, so it is not affected by chroma_format
4127
4128     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4129     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4130     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4131
4132     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4133     {
4134         s->dest[0] += s->mb_y *   linesize << mb_size;
4135         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4136         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4137     }
4138 }
4139
4140 #ifdef CONFIG_ENCODERS
4141
4142 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4143     int x, y;
4144 //FIXME optimize
4145     for(y=0; y<8; y++){
4146         for(x=0; x<8; x++){
4147             int x2, y2;
4148             int sum=0;
4149             int sqr=0;
4150             int count=0;
4151
4152             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4153                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4154                     int v= ptr[x2 + y2*stride];
4155                     sum += v;
4156                     sqr += v*v;
4157                     count++;
4158                 }
4159             }
4160             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4161         }
4162     }
4163 }
4164
4165 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4166 {
4167     int16_t weight[6][64];
4168     DCTELEM orig[6][64];
4169     const int mb_x= s->mb_x;
4170     const int mb_y= s->mb_y;
4171     int i;
4172     int skip_dct[6];
4173     int dct_offset   = s->linesize*8; //default for progressive frames
4174     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4175     int wrap_y, wrap_c;
4176
4177     for(i=0; i<6; i++) skip_dct[i]=0;
4178
4179     if(s->adaptive_quant){
4180         const int last_qp= s->qscale;
4181         const int mb_xy= mb_x + mb_y*s->mb_stride;
4182
4183         s->lambda= s->lambda_table[mb_xy];
4184         update_qscale(s);
4185
4186         if(!(s->flags&CODEC_FLAG_QP_RD)){
4187             s->dquant= s->qscale - last_qp;
4188
4189             if(s->out_format==FMT_H263){
4190                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4191
4192                 if(s->codec_id==CODEC_ID_MPEG4){
4193                     if(!s->mb_intra){
4194                         if(s->pict_type == B_TYPE){
4195                             if(s->dquant&1)
4196                                 s->dquant= (s->dquant/2)*2;
4197                             if(s->mv_dir&MV_DIRECT)
4198                                 s->dquant= 0;
4199                         }
4200                         if(s->mv_type==MV_TYPE_8X8)
4201                             s->dquant=0;
4202                     }
4203                 }
4204             }
4205         }
4206         ff_set_qscale(s, last_qp + s->dquant);
4207     }else if(s->flags&CODEC_FLAG_QP_RD)
4208         ff_set_qscale(s, s->qscale + s->dquant);
4209
4210     wrap_y = s->linesize;
4211     wrap_c = s->uvlinesize;
4212     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4213     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4214     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4215
4216     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4217         uint8_t *ebuf= s->edge_emu_buffer + 32;
4218         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4219         ptr_y= ebuf;
4220         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4221         ptr_cb= ebuf+18*wrap_y;
4222         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4223         ptr_cr= ebuf+18*wrap_y+8;
4224     }
4225
4226     if (s->mb_intra) {
4227         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4228             int progressive_score, interlaced_score;
4229
4230             s->interlaced_dct=0;
4231             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4232                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4233
4234             if(progressive_score > 0){
4235                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4236                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4237                 if(progressive_score > interlaced_score){
4238                     s->interlaced_dct=1;
4239
4240                     dct_offset= wrap_y;
4241                     wrap_y<<=1;
4242                 }
4243             }
4244         }
4245
4246         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4247         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4248         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4249         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4250
4251         if(s->flags&CODEC_FLAG_GRAY){
4252             skip_dct[4]= 1;
4253             skip_dct[5]= 1;
4254         }else{
4255             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4256             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4257         }
4258     }else{
4259         op_pixels_func (*op_pix)[4];
4260         qpel_mc_func (*op_qpix)[16];
4261         uint8_t *dest_y, *dest_cb, *dest_cr;
4262
4263         dest_y  = s->dest[0];
4264         dest_cb = s->dest[1];
4265         dest_cr = s->dest[2];
4266
4267         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4268             op_pix = s->dsp.put_pixels_tab;
4269             op_qpix= s->dsp.put_qpel_pixels_tab;
4270         }else{
4271             op_pix = s->dsp.put_no_rnd_pixels_tab;
4272             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4273         }
4274
4275         if (s->mv_dir & MV_DIR_FORWARD) {
4276             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4277             op_pix = s->dsp.avg_pixels_tab;
4278             op_qpix= s->dsp.avg_qpel_pixels_tab;
4279         }
4280         if (s->mv_dir & MV_DIR_BACKWARD) {
4281             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4282         }
4283
4284         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4285             int progressive_score, interlaced_score;
4286
4287             s->interlaced_dct=0;
4288             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4289                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4290
4291             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4292
4293             if(progressive_score>0){
4294                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4295                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4296
4297                 if(progressive_score > interlaced_score){
4298                     s->interlaced_dct=1;
4299
4300                     dct_offset= wrap_y;
4301                     wrap_y<<=1;
4302                 }
4303             }
4304         }
4305
4306         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4307         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4308         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4309         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4310
4311         if(s->flags&CODEC_FLAG_GRAY){
4312             skip_dct[4]= 1;
4313             skip_dct[5]= 1;
4314         }else{
4315             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4316             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4317         }
4318         /* pre quantization */
4319         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4320             //FIXME optimize
4321             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4322             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4323             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4324             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4325             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4326             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4327         }
4328     }
4329
4330     if(s->avctx->quantizer_noise_shaping){
4331         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4332         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4333         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4334         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4335         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4336         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4337         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4338     }
4339
4340     /* DCT & quantize */
4341     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4342     {
4343         for(i=0;i<6;i++) {
4344             if(!skip_dct[i]){
4345                 int overflow;
4346                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4347             // FIXME we could decide to change to quantizer instead of clipping
4348             // JS: I don't think that would be a good idea it could lower quality instead
4349             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4350                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4351             }else
4352                 s->block_last_index[i]= -1;
4353         }
4354         if(s->avctx->quantizer_noise_shaping){
4355             for(i=0;i<6;i++) {
4356                 if(!skip_dct[i]){
4357                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4358                 }
4359             }
4360         }
4361
4362         if(s->luma_elim_threshold && !s->mb_intra)
4363             for(i=0; i<4; i++)
4364                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4365         if(s->chroma_elim_threshold && !s->mb_intra)
4366             for(i=4; i<6; i++)
4367                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4368
4369         if(s->flags & CODEC_FLAG_CBP_RD){
4370             for(i=0;i<6;i++) {
4371                 if(s->block_last_index[i] == -1)
4372                     s->coded_score[i]= INT_MAX/256;
4373             }
4374         }
4375     }
4376
4377     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4378         s->block_last_index[4]=
4379         s->block_last_index[5]= 0;
4380         s->block[4][0]=
4381         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4382     }
4383
4384     //non c quantize code returns incorrect block_last_index FIXME
4385     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4386         for(i=0; i<6; i++){
4387             int j;
4388             if(s->block_last_index[i]>0){
4389                 for(j=63; j>0; j--){
4390                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4391                 }
4392                 s->block_last_index[i]= j;
4393             }
4394         }
4395     }
4396
4397     /* huffman encode */
4398     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4399     case CODEC_ID_MPEG1VIDEO:
4400     case CODEC_ID_MPEG2VIDEO:
4401         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4402     case CODEC_ID_MPEG4:
4403         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4404     case CODEC_ID_MSMPEG4V2:
4405     case CODEC_ID_MSMPEG4V3:
4406     case CODEC_ID_WMV1:
4407         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4408     case CODEC_ID_WMV2:
4409          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4410 #ifdef CONFIG_H261_ENCODER
4411     case CODEC_ID_H261:
4412         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4413 #endif
4414     case CODEC_ID_H263:
4415     case CODEC_ID_H263P:
4416     case CODEC_ID_FLV1:
4417     case CODEC_ID_RV10:
4418     case CODEC_ID_RV20:
4419         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4420     case CODEC_ID_MJPEG:
4421         mjpeg_encode_mb(s, s->block); break;
4422     default:
4423         assert(0);
4424     }
4425 }
4426
4427 #endif //CONFIG_ENCODERS
4428
4429 void ff_mpeg_flush(AVCodecContext *avctx){
4430     int i;
4431     MpegEncContext *s = avctx->priv_data;
4432
4433     if(s==NULL || s->picture==NULL)
4434         return;
4435
4436     for(i=0; i<MAX_PICTURE_COUNT; i++){
4437        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4438                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4439         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4440     }
4441     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4442
4443     s->mb_x= s->mb_y= 0;
4444
4445     s->parse_context.state= -1;
4446     s->parse_context.frame_start_found= 0;
4447     s->parse_context.overread= 0;
4448     s->parse_context.overread_index= 0;
4449     s->parse_context.index= 0;
4450     s->parse_context.last_index= 0;
4451     s->bitstream_buffer_size=0;
4452 }
4453
4454 #ifdef CONFIG_ENCODERS
4455 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4456 {
4457     const uint16_t *srcw= (uint16_t*)src;
4458     int words= length>>4;
4459     int bits= length&15;
4460     int i;
4461
4462     if(length==0) return;
4463
4464     if(words < 16){
4465         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4466     }else if(put_bits_count(pb)&7){
4467         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4468     }else{
4469         for(i=0; put_bits_count(pb)&31; i++)
4470             put_bits(pb, 8, src[i]);
4471         flush_put_bits(pb);
4472         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4473         skip_put_bytes(pb, 2*words-i);
4474     }
4475
4476     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4477 }
4478
4479 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4480     int i;
4481
4482     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4483
4484     /* mpeg1 */
4485     d->mb_skip_run= s->mb_skip_run;
4486     for(i=0; i<3; i++)
4487         d->last_dc[i]= s->last_dc[i];
4488
4489     /* statistics */
4490     d->mv_bits= s->mv_bits;
4491     d->i_tex_bits= s->i_tex_bits;
4492     d->p_tex_bits= s->p_tex_bits;
4493     d->i_count= s->i_count;
4494     d->f_count= s->f_count;
4495     d->b_count= s->b_count;
4496     d->skip_count= s->skip_count;
4497     d->misc_bits= s->misc_bits;
4498     d->last_bits= 0;
4499
4500     d->mb_skipped= 0;
4501     d->qscale= s->qscale;
4502     d->dquant= s->dquant;
4503 }
4504
4505 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4506     int i;
4507
4508     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4509     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4510
4511     /* mpeg1 */
4512     d->mb_skip_run= s->mb_skip_run;
4513     for(i=0; i<3; i++)
4514         d->last_dc[i]= s->last_dc[i];
4515
4516     /* statistics */
4517     d->mv_bits= s->mv_bits;
4518     d->i_tex_bits= s->i_tex_bits;
4519     d->p_tex_bits= s->p_tex_bits;
4520     d->i_count= s->i_count;
4521     d->f_count= s->f_count;
4522     d->b_count= s->b_count;
4523     d->skip_count= s->skip_count;
4524     d->misc_bits= s->misc_bits;
4525
4526     d->mb_intra= s->mb_intra;
4527     d->mb_skipped= s->mb_skipped;
4528     d->mv_type= s->mv_type;
4529     d->mv_dir= s->mv_dir;
4530     d->pb= s->pb;
4531     if(s->data_partitioning){
4532         d->pb2= s->pb2;
4533         d->tex_pb= s->tex_pb;
4534     }
4535     d->block= s->block;
4536     for(i=0; i<6; i++)
4537         d->block_last_index[i]= s->block_last_index[i];
4538     d->interlaced_dct= s->interlaced_dct;
4539     d->qscale= s->qscale;
4540 }
4541
4542 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4543                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4544                            int *dmin, int *next_block, int motion_x, int motion_y)
4545 {
4546     int score;
4547     uint8_t *dest_backup[3];
4548
4549     copy_context_before_encode(s, backup, type);
4550
4551     s->block= s->blocks[*next_block];
4552     s->pb= pb[*next_block];
4553     if(s->data_partitioning){
4554         s->pb2   = pb2   [*next_block];
4555         s->tex_pb= tex_pb[*next_block];
4556     }
4557
4558     if(*next_block){
4559         memcpy(dest_backup, s->dest, sizeof(s->dest));
4560         s->dest[0] = s->rd_scratchpad;
4561         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4562         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4563         assert(s->linesize >= 32); //FIXME
4564     }
4565
4566     encode_mb(s, motion_x, motion_y);
4567
4568     score= put_bits_count(&s->pb);
4569     if(s->data_partitioning){
4570         score+= put_bits_count(&s->pb2);
4571         score+= put_bits_count(&s->tex_pb);
4572     }
4573
4574     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4575         MPV_decode_mb(s, s->block);
4576
4577         score *= s->lambda2;
4578         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4579     }
4580
4581     if(*next_block){
4582         memcpy(s->dest, dest_backup, sizeof(s->dest));
4583     }
4584
4585     if(score<*dmin){
4586         *dmin= score;
4587         *next_block^=1;
4588
4589         copy_context_after_encode(best, s, type);
4590     }
4591 }
4592
4593 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4594     uint32_t *sq = squareTbl + 256;
4595     int acc=0;
4596     int x,y;
4597
4598     if(w==16 && h==16)
4599         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4600     else if(w==8 && h==8)
4601         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4602
4603     for(y=0; y<h; y++){
4604         for(x=0; x<w; x++){
4605             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4606         }
4607     }
4608
4609     assert(acc>=0);
4610
4611     return acc;
4612 }
4613
4614 static int sse_mb(MpegEncContext *s){
4615     int w= 16;
4616     int h= 16;
4617
4618     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4619     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4620
4621     if(w==16 && h==16)
4622       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4623         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4624                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4625                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4626       }else{
4627         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4628                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4629                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4630       }
4631     else
4632         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4633                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4634                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4635 }
4636
4637 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4638     MpegEncContext *s= arg;
4639
4640
4641     s->me.pre_pass=1;
4642     s->me.dia_size= s->avctx->pre_dia_size;
4643     s->first_slice_line=1;
4644     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4645         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4646             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4647         }
4648         s->first_slice_line=0;
4649     }
4650
4651     s->me.pre_pass=0;
4652
4653     return 0;
4654 }
4655
4656 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4657     MpegEncContext *s= arg;
4658
4659     s->me.dia_size= s->avctx->dia_size;
4660     s->first_slice_line=1;
4661     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4662         s->mb_x=0; //for block init below
4663         ff_init_block_index(s);
4664         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4665             s->block_index[0]+=2;
4666             s->block_index[1]+=2;
4667             s->block_index[2]+=2;
4668             s->block_index[3]+=2;
4669
4670             /* compute motion vector & mb_type and store in context */
4671             if(s->pict_type==B_TYPE)
4672                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4673             else
4674                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4675         }
4676         s->first_slice_line=0;
4677     }
4678     return 0;
4679 }
4680
4681 static int mb_var_thread(AVCodecContext *c, void *arg){
4682     MpegEncContext *s= arg;
4683     int mb_x, mb_y;
4684
4685     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4686         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4687             int xx = mb_x * 16;
4688             int yy = mb_y * 16;
4689             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4690             int varc;
4691             int sum = s->dsp.pix_sum(pix, s->linesize);
4692
4693             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4694
4695             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4696             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4697             s->me.mb_var_sum_temp    += varc;
4698         }
4699     }
4700     return 0;
4701 }
4702
4703 static void write_slice_end(MpegEncContext *s){
4704     if(s->codec_id==CODEC_ID_MPEG4){
4705         if(s->partitioned_frame){
4706             ff_mpeg4_merge_partitions(s);
4707         }
4708
4709         ff_mpeg4_stuffing(&s->pb);
4710     }else if(s->out_format == FMT_MJPEG){
4711         ff_mjpeg_stuffing(&s->pb);
4712     }
4713
4714     align_put_bits(&s->pb);
4715     flush_put_bits(&s->pb);
4716
4717     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4718         s->misc_bits+= get_bits_diff(s);
4719 }
4720
4721 static int encode_thread(AVCodecContext *c, void *arg){
4722     MpegEncContext *s= arg;
4723     int mb_x, mb_y, pdif = 0;
4724     int i, j;
4725     MpegEncContext best_s, backup_s;
4726     uint8_t bit_buf[2][MAX_MB_BYTES];
4727     uint8_t bit_buf2[2][MAX_MB_BYTES];
4728     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4729     PutBitContext pb[2], pb2[2], tex_pb[2];
4730 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4731
4732     for(i=0; i<2; i++){
4733         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4734         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4735         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4736     }
4737
4738     s->last_bits= put_bits_count(&s->pb);
4739     s->mv_bits=0;
4740     s->misc_bits=0;
4741     s->i_tex_bits=0;
4742     s->p_tex_bits=0;
4743     s->i_count=0;
4744     s->f_count=0;
4745     s->b_count=0;
4746     s->skip_count=0;
4747
4748     for(i=0; i<3; i++){
4749         /* init last dc values */
4750         /* note: quant matrix value (8) is implied here */
4751         s->last_dc[i] = 128 << s->intra_dc_precision;
4752
4753         s->current_picture.error[i] = 0;
4754     }
4755     s->mb_skip_run = 0;
4756     memset(s->last_mv, 0, sizeof(s->last_mv));
4757
4758     s->last_mv_dir = 0;
4759
4760     switch(s->codec_id){
4761     case CODEC_ID_H263:
4762     case CODEC_ID_H263P:
4763     case CODEC_ID_FLV1:
4764         s->gob_index = ff_h263_get_gob_height(s);
4765         break;
4766     case CODEC_ID_MPEG4:
4767         if(s->partitioned_frame)
4768             ff_mpeg4_init_partitions(s);
4769         break;
4770     }
4771
4772     s->resync_mb_x=0;
4773     s->resync_mb_y=0;
4774     s->first_slice_line = 1;
4775     s->ptr_lastgob = s->pb.buf;
4776     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4777 //    printf("row %d at %X\n", s->mb_y, (int)s);
4778         s->mb_x=0;
4779         s->mb_y= mb_y;
4780
4781         ff_set_qscale(s, s->qscale);
4782         ff_init_block_index(s);
4783
4784         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4785             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4786             int mb_type= s->mb_type[xy];
4787 //            int d;
4788             int dmin= INT_MAX;
4789             int dir;
4790
4791             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4792                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4793                 return -1;
4794             }
4795             if(s->data_partitioning){
4796                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4797                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4798                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4799                     return -1;
4800                 }
4801             }
4802
4803             s->mb_x = mb_x;
4804             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4805             ff_update_block_index(s);
4806
4807 #ifdef CONFIG_H261_ENCODER
4808             if(s->codec_id == CODEC_ID_H261){
4809                 ff_h261_reorder_mb_index(s);
4810                 xy= s->mb_y*s->mb_stride + s->mb_x;
4811                 mb_type= s->mb_type[xy];
4812             }
4813 #endif
4814
4815             /* write gob / video packet header  */
4816             if(s->rtp_mode){
4817                 int current_packet_size, is_gob_start;
4818
4819                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4820
4821                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4822
4823                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4824
4825                 switch(s->codec_id){
4826                 case CODEC_ID_H263:
4827                 case CODEC_ID_H263P:
4828                     if(!s->h263_slice_structured)
4829                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4830                     break;
4831                 case CODEC_ID_MPEG2VIDEO:
4832                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4833                 case CODEC_ID_MPEG1VIDEO:
4834                     if(s->mb_skip_run) is_gob_start=0;
4835                     break;
4836                 }
4837
4838                 if(is_gob_start){
4839                     if(s->start_mb_y != mb_y || mb_x!=0){
4840                         write_slice_end(s);
4841
4842                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4843                             ff_mpeg4_init_partitions(s);
4844                         }
4845                     }
4846
4847                     assert((put_bits_count(&s->pb)&7) == 0);
4848                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4849
4850                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4851                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4852                         int d= 100 / s->avctx->error_rate;
4853                         if(r % d == 0){
4854                             current_packet_size=0;
4855 #ifndef ALT_BITSTREAM_WRITER
4856                             s->pb.buf_ptr= s->ptr_lastgob;
4857 #endif
4858                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4859                         }
4860                     }
4861
4862                     if (s->avctx->rtp_callback){
4863                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4864                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4865                     }
4866
4867                     switch(s->codec_id){
4868                     case CODEC_ID_MPEG4:
4869                         ff_mpeg4_encode_video_packet_header(s);
4870                         ff_mpeg4_clean_buffers(s);
4871                     break;
4872                     case CODEC_ID_MPEG1VIDEO:
4873                     case CODEC_ID_MPEG2VIDEO:
4874                         ff_mpeg1_encode_slice_header(s);
4875                         ff_mpeg1_clean_buffers(s);
4876                     break;
4877                     case CODEC_ID_H263:
4878                     case CODEC_ID_H263P:
4879                         h263_encode_gob_header(s, mb_y);
4880                     break;
4881                     }
4882
4883                     if(s->flags&CODEC_FLAG_PASS1){
4884                         int bits= put_bits_count(&s->pb);
4885                         s->misc_bits+= bits - s->last_bits;
4886                         s->last_bits= bits;
4887                     }
4888
4889                     s->ptr_lastgob += current_packet_size;
4890                     s->first_slice_line=1;
4891                     s->resync_mb_x=mb_x;
4892                     s->resync_mb_y=mb_y;
4893                 }
4894             }
4895
4896             if(  (s->resync_mb_x   == s->mb_x)
4897                && s->resync_mb_y+1 == s->mb_y){
4898                 s->first_slice_line=0;
4899             }
4900
4901             s->mb_skipped=0;
4902             s->dquant=0; //only for QP_RD
4903
4904             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
4905                 int next_block=0;
4906                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4907
4908                 copy_context_before_encode(&backup_s, s, -1);
4909                 backup_s.pb= s->pb;
4910                 best_s.data_partitioning= s->data_partitioning;
4911                 best_s.partitioned_frame= s->partitioned_frame;
4912                 if(s->data_partitioning){
4913                     backup_s.pb2= s->pb2;
4914                     backup_s.tex_pb= s->tex_pb;
4915                 }
4916
4917                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4918                     s->mv_dir = MV_DIR_FORWARD;
4919                     s->mv_type = MV_TYPE_16X16;
4920                     s->mb_intra= 0;
4921                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4922                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4923                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
4924                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4925                 }
4926                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
4927                     s->mv_dir = MV_DIR_FORWARD;
4928                     s->mv_type = MV_TYPE_FIELD;
4929                     s->mb_intra= 0;
4930                     for(i=0; i<2; i++){
4931                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4932                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4933                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4934                     }
4935                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
4936                                  &dmin, &next_block, 0, 0);
4937                 }
4938                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
4939                     s->mv_dir = MV_DIR_FORWARD;
4940                     s->mv_type = MV_TYPE_16X16;
4941                     s->mb_intra= 0;
4942                     s->mv[0][0][0] = 0;
4943                     s->mv[0][0][1] = 0;
4944                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
4945                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4946                 }
4947                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
4948                     s->mv_dir = MV_DIR_FORWARD;
4949                     s->mv_type = MV_TYPE_8X8;
4950                     s->mb_intra= 0;
4951                     for(i=0; i<4; i++){
4952                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4953                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4954                     }
4955                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
4956                                  &dmin, &next_block, 0, 0);
4957                 }
4958                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4959                     s->mv_dir = MV_DIR_FORWARD;
4960                     s->mv_type = MV_TYPE_16X16;
4961                     s->mb_intra= 0;
4962                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4963                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4964                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
4965                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4966                 }
4967                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4968                     s->mv_dir = MV_DIR_BACKWARD;
4969                     s->mv_type = MV_TYPE_16X16;
4970                     s->mb_intra= 0;
4971                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4972                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4973                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
4974                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4975                 }
4976                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4977                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4978                     s->mv_type = MV_TYPE_16X16;
4979                     s->mb_intra= 0;
4980                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4981                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4982                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4983                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4984                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
4985                                  &dmin, &next_block, 0, 0);
4986                 }
4987                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
4988                     int mx= s->b_direct_mv_table[xy][0];
4989                     int my= s->b_direct_mv_table[xy][1];
4990
4991                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4992                     s->mb_intra= 0;
4993                     ff_mpeg4_set_direct_mv(s, mx, my);
4994                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
4995                                  &dmin, &next_block, mx, my);
4996                 }
4997                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
4998                     s->mv_dir = MV_DIR_FORWARD;
4999                     s->mv_type = MV_TYPE_FIELD;
5000                     s->mb_intra= 0;
5001                     for(i=0; i<2; i++){
5002                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5003                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5004                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5005                     }
5006                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5007                                  &dmin, &next_block, 0, 0);
5008                 }
5009                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5010                     s->mv_dir = MV_DIR_BACKWARD;
5011                     s->mv_type = MV_TYPE_FIELD;
5012                     s->mb_intra= 0;
5013                     for(i=0; i<2; i++){
5014                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5015                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5016                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5017                     }
5018                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5019                                  &dmin, &next_block, 0, 0);
5020                 }
5021                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5022                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5023                     s->mv_type = MV_TYPE_FIELD;
5024                     s->mb_intra= 0;
5025                     for(dir=0; dir<2; dir++){
5026                         for(i=0; i<2; i++){
5027                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5028                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5029                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5030                         }
5031                     }
5032                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5033                                  &dmin, &next_block, 0, 0);
5034                 }
5035                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5036                     s->mv_dir = 0;
5037                     s->mv_type = MV_TYPE_16X16;
5038                     s->mb_intra= 1;
5039                     s->mv[0][0][0] = 0;
5040                     s->mv[0][0][1] = 0;
5041                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5042                                  &dmin, &next_block, 0, 0);
5043                     if(s->h263_pred || s->h263_aic){
5044                         if(best_s.mb_intra)
5045                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5046                         else
5047                             ff_clean_intra_table_entries(s); //old mode?
5048                     }
5049                 }
5050
5051                 if(s->flags & CODEC_FLAG_QP_RD){
5052                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
5053                         const int last_qp= backup_s.qscale;
5054                         int dquant, dir, qp, dc[6];
5055                         DCTELEM ac[6][16];
5056                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5057
5058                         assert(backup_s.dquant == 0);
5059
5060                         //FIXME intra
5061                         s->mv_dir= best_s.mv_dir;
5062                         s->mv_type = MV_TYPE_16X16;
5063                         s->mb_intra= best_s.mb_intra;
5064                         s->mv[0][0][0] = best_s.mv[0][0][0];
5065                         s->mv[0][0][1] = best_s.mv[0][0][1];
5066                         s->mv[1][0][0] = best_s.mv[1][0][0];
5067                         s->mv[1][0][1] = best_s.mv[1][0][1];
5068
5069                         dir= s->pict_type == B_TYPE ? 2 : 1;
5070                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
5071                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
5072                             qp= last_qp + dquant;
5073                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5074                                 break;
5075                             backup_s.dquant= dquant;
5076                             if(s->mb_intra && s->dc_val[0]){
5077                                 for(i=0; i<6; i++){
5078                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5079                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5080                                 }
5081                             }
5082
5083                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5084                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5085                             if(best_s.qscale != qp){
5086                                 if(s->mb_intra && s->dc_val[0]){
5087                                     for(i=0; i<6; i++){
5088                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5089                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5090                                     }
5091                                 }
5092                                 if(dir > 0 && dquant==dir){
5093                                     dquant= 0;
5094                                     dir= -dir;
5095                                 }else
5096                                     break;
5097                             }
5098                         }
5099                         qp= best_s.qscale;
5100                         s->current_picture.qscale_table[xy]= qp;
5101                     }
5102                 }
5103
5104                 copy_context_after_encode(s, &best_s, -1);
5105
5106                 pb_bits_count= put_bits_count(&s->pb);
5107                 flush_put_bits(&s->pb);
5108                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5109                 s->pb= backup_s.pb;
5110
5111                 if(s->data_partitioning){
5112                     pb2_bits_count= put_bits_count(&s->pb2);
5113                     flush_put_bits(&s->pb2);
5114                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5115                     s->pb2= backup_s.pb2;
5116
5117                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5118                     flush_put_bits(&s->tex_pb);
5119                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5120                     s->tex_pb= backup_s.tex_pb;
5121                 }
5122                 s->last_bits= put_bits_count(&s->pb);
5123
5124                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5125                     ff_h263_update_motion_val(s);
5126
5127                 if(next_block==0){ //FIXME 16 vs linesize16
5128                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5129                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5130                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5131                 }
5132
5133                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5134                     MPV_decode_mb(s, s->block);
5135             } else {
5136                 int motion_x, motion_y;
5137                 s->mv_type=MV_TYPE_16X16;
5138                 // only one MB-Type possible
5139
5140                 switch(mb_type){
5141                 case CANDIDATE_MB_TYPE_INTRA:
5142                     s->mv_dir = 0;
5143                     s->mb_intra= 1;
5144                     motion_x= s->mv[0][0][0] = 0;
5145                     motion_y= s->mv[0][0][1] = 0;
5146                     break;
5147                 case CANDIDATE_MB_TYPE_INTER:
5148                     s->mv_dir = MV_DIR_FORWARD;
5149                     s->mb_intra= 0;
5150                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5151                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5152                     break;
5153                 case CANDIDATE_MB_TYPE_INTER_I:
5154                     s->mv_dir = MV_DIR_FORWARD;
5155                     s->mv_type = MV_TYPE_FIELD;
5156                     s->mb_intra= 0;
5157                     for(i=0; i<2; i++){
5158                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5159                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5160                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5161                     }
5162                     motion_x = motion_y = 0;
5163                     break;
5164                 case CANDIDATE_MB_TYPE_INTER4V:
5165                     s->mv_dir = MV_DIR_FORWARD;
5166                     s->mv_type = MV_TYPE_8X8;
5167                     s->mb_intra= 0;
5168                     for(i=0; i<4; i++){
5169                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5170                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5171                     }
5172                     motion_x= motion_y= 0;
5173                     break;
5174                 case CANDIDATE_MB_TYPE_DIRECT:
5175                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5176                     s->mb_intra= 0;
5177                     motion_x=s->b_direct_mv_table[xy][0];
5178                     motion_y=s->b_direct_mv_table[xy][1];
5179                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5180                     break;
5181                 case CANDIDATE_MB_TYPE_BIDIR:
5182                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5183                     s->mb_intra= 0;
5184                     motion_x=0;
5185                     motion_y=0;
5186                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5187                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5188                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5189                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5190                     break;
5191                 case CANDIDATE_MB_TYPE_BACKWARD:
5192                     s->mv_dir = MV_DIR_BACKWARD;
5193                     s->mb_intra= 0;
5194                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5195                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5196                     break;
5197                 case CANDIDATE_MB_TYPE_FORWARD:
5198                     s->mv_dir = MV_DIR_FORWARD;
5199                     s->mb_intra= 0;
5200                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5201                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5202 //                    printf(" %d %d ", motion_x, motion_y);
5203                     break;
5204                 case CANDIDATE_MB_TYPE_FORWARD_I:
5205                     s->mv_dir = MV_DIR_FORWARD;
5206                     s->mv_type = MV_TYPE_FIELD;
5207                     s->mb_intra= 0;
5208                     for(i=0; i<2; i++){
5209                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5210                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5211                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5212                     }
5213                     motion_x=motion_y=0;
5214                     break;
5215                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5216                     s->mv_dir = MV_DIR_BACKWARD;
5217                     s->mv_type = MV_TYPE_FIELD;
5218                     s->mb_intra= 0;
5219                     for(i=0; i<2; i++){
5220                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5221                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5222                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5223                     }
5224                     motion_x=motion_y=0;
5225                     break;
5226                 case CANDIDATE_MB_TYPE_BIDIR_I:
5227                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5228                     s->mv_type = MV_TYPE_FIELD;
5229                     s->mb_intra= 0;
5230                     for(dir=0; dir<2; dir++){
5231                         for(i=0; i<2; i++){
5232                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5233                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5234                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5235                         }
5236                     }
5237                     motion_x=motion_y=0;
5238                     break;
5239                 default:
5240                     motion_x=motion_y=0; //gcc warning fix
5241                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5242                 }
5243
5244                 encode_mb(s, motion_x, motion_y);
5245
5246                 // RAL: Update last macroblock type
5247                 s->last_mv_dir = s->mv_dir;
5248
5249                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5250                     ff_h263_update_motion_val(s);
5251
5252                 MPV_decode_mb(s, s->block);
5253             }
5254
5255             /* clean the MV table in IPS frames for direct mode in B frames */
5256             if(s->mb_intra /* && I,P,S_TYPE */){
5257                 s->p_mv_table[xy][0]=0;
5258                 s->p_mv_table[xy][1]=0;
5259             }
5260
5261             if(s->flags&CODEC_FLAG_PSNR){
5262                 int w= 16;
5263                 int h= 16;
5264
5265                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5266                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5267
5268                 s->current_picture.error[0] += sse(
5269                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5270                     s->dest[0], w, h, s->linesize);
5271                 s->current_picture.error[1] += sse(
5272                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5273                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5274                 s->current_picture.error[2] += sse(
5275                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5276                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5277             }
5278             if(s->loop_filter){
5279                 if(s->out_format == FMT_H263)
5280                     ff_h263_loop_filter(s);
5281             }
5282 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5283         }
5284     }
5285
5286     //not beautiful here but we must write it before flushing so it has to be here
5287     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5288         msmpeg4_encode_ext_header(s);
5289
5290     write_slice_end(s);
5291
5292     /* Send the last GOB if RTP */
5293     if (s->avctx->rtp_callback) {
5294         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5295         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5296         /* Call the RTP callback to send the last GOB */
5297         emms_c();
5298         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5299     }
5300
5301     return 0;
5302 }
5303
5304 #define MERGE(field) dst->field += src->field; src->field=0
5305 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5306     MERGE(me.scene_change_score);
5307     MERGE(me.mc_mb_var_sum_temp);
5308     MERGE(me.mb_var_sum_temp);
5309 }
5310
5311 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5312     int i;
5313
5314     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5315     MERGE(dct_count[1]);
5316     MERGE(mv_bits);
5317     MERGE(i_tex_bits);
5318     MERGE(p_tex_bits);
5319     MERGE(i_count);
5320     MERGE(f_count);
5321     MERGE(b_count);
5322     MERGE(skip_count);
5323     MERGE(misc_bits);
5324     MERGE(error_count);
5325     MERGE(padding_bug_score);
5326     MERGE(current_picture.error[0]);
5327     MERGE(current_picture.error[1]);
5328     MERGE(current_picture.error[2]);
5329
5330     if(dst->avctx->noise_reduction){
5331         for(i=0; i<64; i++){
5332             MERGE(dct_error_sum[0][i]);
5333             MERGE(dct_error_sum[1][i]);
5334         }
5335     }
5336
5337     assert(put_bits_count(&src->pb) % 8 ==0);
5338     assert(put_bits_count(&dst->pb) % 8 ==0);
5339     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5340     flush_put_bits(&dst->pb);
5341 }
5342
5343 static void estimate_qp(MpegEncContext *s, int dry_run){
5344     if (!s->fixed_qscale)
5345         s->current_picture_ptr->quality=
5346         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5347
5348     if(s->adaptive_quant){
5349         switch(s->codec_id){
5350         case CODEC_ID_MPEG4:
5351             ff_clean_mpeg4_qscales(s);
5352             break;
5353         case CODEC_ID_H263:
5354         case CODEC_ID_H263P:
5355         case CODEC_ID_FLV1:
5356             ff_clean_h263_qscales(s);
5357             break;
5358         }
5359
5360         s->lambda= s->lambda_table[0];
5361         //FIXME broken
5362     }else
5363         s->lambda= s->current_picture.quality;
5364 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5365     update_qscale(s);
5366 }
5367
5368 static void encode_picture(MpegEncContext *s, int picture_number)
5369 {
5370     int i;
5371     int bits;
5372
5373     s->picture_number = picture_number;
5374
5375     /* Reset the average MB variance */
5376     s->me.mb_var_sum_temp    =
5377     s->me.mc_mb_var_sum_temp = 0;
5378
5379     /* we need to initialize some time vars before we can encode b-frames */
5380     // RAL: Condition added for MPEG1VIDEO
5381     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5382         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5383
5384     s->me.scene_change_score=0;
5385
5386 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5387
5388     if(s->pict_type==I_TYPE){
5389         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5390         else                        s->no_rounding=0;
5391     }else if(s->pict_type!=B_TYPE){
5392         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5393             s->no_rounding ^= 1;
5394     }
5395
5396     if(s->flags & CODEC_FLAG_PASS2){
5397         estimate_qp(s, 1);
5398         ff_get_2pass_fcode(s);
5399     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5400         RateControlContext *rcc= &s->rc_context;
5401
5402         if(s->pict_type==B_TYPE)
5403             s->lambda= rcc->last_qscale_for[s->pict_type];
5404         else
5405             s->lambda= rcc->last_qscale_for[rcc->last_non_b_pict_type];
5406         update_qscale(s);
5407     }
5408
5409     s->mb_intra=0; //for the rate distortion & bit compare functions
5410     for(i=1; i<s->avctx->thread_count; i++){
5411         ff_update_duplicate_context(s->thread_context[i], s);
5412     }
5413
5414     ff_init_me(s);
5415
5416     /* Estimate motion for every MB */
5417     if(s->pict_type != I_TYPE){
5418         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5419         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5420         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5421             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5422                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5423             }
5424         }
5425
5426         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5427     }else /* if(s->pict_type == I_TYPE) */{
5428         /* I-Frame */
5429         for(i=0; i<s->mb_stride*s->mb_height; i++)
5430             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5431
5432         if(!s->fixed_qscale){
5433             /* finding spatial complexity for I-frame rate control */
5434             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5435         }
5436     }
5437     for(i=1; i<s->avctx->thread_count; i++){
5438         merge_context_after_me(s, s->thread_context[i]);
5439     }
5440     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5441     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5442     emms_c();
5443
5444     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5445         s->pict_type= I_TYPE;
5446         for(i=0; i<s->mb_stride*s->mb_height; i++)
5447             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5448 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5449     }
5450
5451     if(!s->umvplus){
5452         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5453             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5454
5455             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5456                 int a,b;
5457                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5458                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5459                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5460             }
5461
5462             ff_fix_long_p_mvs(s);
5463             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5464             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5465                 int j;
5466                 for(i=0; i<2; i++){
5467                     for(j=0; j<2; j++)
5468                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5469                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5470                 }
5471             }
5472         }
5473
5474         if(s->pict_type==B_TYPE){
5475             int a, b;
5476
5477             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5478             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5479             s->f_code = FFMAX(a, b);
5480
5481             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5482             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5483             s->b_code = FFMAX(a, b);
5484
5485             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5486             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5487             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5488             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5489             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5490                 int dir, j;
5491                 for(dir=0; dir<2; dir++){
5492                     for(i=0; i<2; i++){
5493                         for(j=0; j<2; j++){
5494                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5495                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5496                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5497                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5498                         }
5499                     }
5500                 }
5501             }
5502         }
5503     }
5504
5505     estimate_qp(s, 0);
5506
5507     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5508         s->qscale= 3; //reduce clipping problems
5509
5510     if (s->out_format == FMT_MJPEG) {
5511         /* for mjpeg, we do include qscale in the matrix */
5512         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5513         for(i=1;i<64;i++){
5514             int j= s->dsp.idct_permutation[i];
5515
5516             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3) & 0xFF;
5517         }
5518         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5519                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5520         s->qscale= 8;
5521     }
5522
5523     //FIXME var duplication
5524     s->current_picture_ptr->key_frame=
5525     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5526     s->current_picture_ptr->pict_type=
5527     s->current_picture.pict_type= s->pict_type;
5528
5529     if(s->current_picture.key_frame)
5530         s->picture_in_gop_number=0;
5531
5532     s->last_bits= put_bits_count(&s->pb);
5533     switch(s->out_format) {
5534     case FMT_MJPEG:
5535         mjpeg_picture_header(s);
5536         break;
5537 #ifdef CONFIG_H261_ENCODER
5538     case FMT_H261:
5539         ff_h261_encode_picture_header(s, picture_number);
5540         break;
5541 #endif
5542     case FMT_H263:
5543         if (s->codec_id == CODEC_ID_WMV2)
5544             ff_wmv2_encode_picture_header(s, picture_number);
5545         else if (s->h263_msmpeg4)
5546             msmpeg4_encode_picture_header(s, picture_number);
5547         else if (s->h263_pred)
5548             mpeg4_encode_picture_header(s, picture_number);
5549 #ifdef CONFIG_RV10_ENCODER
5550         else if (s->codec_id == CODEC_ID_RV10)
5551             rv10_encode_picture_header(s, picture_number);
5552 #endif
5553 #ifdef CONFIG_RV20_ENCODER
5554         else if (s->codec_id == CODEC_ID_RV20)
5555             rv20_encode_picture_header(s, picture_number);
5556 #endif
5557         else if (s->codec_id == CODEC_ID_FLV1)
5558             ff_flv_encode_picture_header(s, picture_number);
5559         else
5560             h263_encode_picture_header(s, picture_number);
5561         break;
5562     case FMT_MPEG1:
5563         mpeg1_encode_picture_header(s, picture_number);
5564         break;
5565     case FMT_H264:
5566         break;
5567     default:
5568         assert(0);
5569     }
5570     bits= put_bits_count(&s->pb);
5571     s->header_bits= bits - s->last_bits;
5572
5573     for(i=1; i<s->avctx->thread_count; i++){
5574         update_duplicate_context_after_me(s->thread_context[i], s);
5575     }
5576     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5577     for(i=1; i<s->avctx->thread_count; i++){
5578         merge_context_after_encode(s, s->thread_context[i]);
5579     }
5580     emms_c();
5581 }
5582
5583 #endif //CONFIG_ENCODERS
5584
5585 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5586     const int intra= s->mb_intra;
5587     int i;
5588
5589     s->dct_count[intra]++;
5590
5591     for(i=0; i<64; i++){
5592         int level= block[i];
5593
5594         if(level){
5595             if(level>0){
5596                 s->dct_error_sum[intra][i] += level;
5597                 level -= s->dct_offset[intra][i];
5598                 if(level<0) level=0;
5599             }else{
5600                 s->dct_error_sum[intra][i] -= level;
5601                 level += s->dct_offset[intra][i];
5602                 if(level>0) level=0;
5603             }
5604             block[i]= level;
5605         }
5606     }
5607 }
5608
5609 #ifdef CONFIG_ENCODERS
5610
5611 static int dct_quantize_trellis_c(MpegEncContext *s,
5612                         DCTELEM *block, int n,
5613                         int qscale, int *overflow){
5614     const int *qmat;
5615     const uint8_t *scantable= s->intra_scantable.scantable;
5616     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5617     int max=0;
5618     unsigned int threshold1, threshold2;
5619     int bias=0;
5620     int run_tab[65];
5621     int level_tab[65];
5622     int score_tab[65];
5623     int survivor[65];
5624     int survivor_count;
5625     int last_run=0;
5626     int last_level=0;
5627     int last_score= 0;
5628     int last_i;
5629     int coeff[2][64];
5630     int coeff_count[64];
5631     int qmul, qadd, start_i, last_non_zero, i, dc;
5632     const int esc_length= s->ac_esc_length;
5633     uint8_t * length;
5634     uint8_t * last_length;
5635     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5636
5637     s->dsp.fdct (block);
5638
5639     if(s->dct_error_sum)
5640         s->denoise_dct(s, block);
5641     qmul= qscale*16;
5642     qadd= ((qscale-1)|1)*8;
5643
5644     if (s->mb_intra) {
5645         int q;
5646         if (!s->h263_aic) {
5647             if (n < 4)
5648                 q = s->y_dc_scale;
5649             else
5650                 q = s->c_dc_scale;
5651             q = q << 3;
5652         } else{
5653             /* For AIC we skip quant/dequant of INTRADC */
5654             q = 1 << 3;
5655             qadd=0;
5656         }
5657
5658         /* note: block[0] is assumed to be positive */
5659         block[0] = (block[0] + (q >> 1)) / q;
5660         start_i = 1;
5661         last_non_zero = 0;
5662         qmat = s->q_intra_matrix[qscale];
5663         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5664             bias= 1<<(QMAT_SHIFT-1);
5665         length     = s->intra_ac_vlc_length;
5666         last_length= s->intra_ac_vlc_last_length;
5667     } else {
5668         start_i = 0;
5669         last_non_zero = -1;
5670         qmat = s->q_inter_matrix[qscale];
5671         length     = s->inter_ac_vlc_length;
5672         last_length= s->inter_ac_vlc_last_length;
5673     }
5674     last_i= start_i;
5675
5676     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5677     threshold2= (threshold1<<1);
5678
5679     for(i=63; i>=start_i; i--) {
5680         const int j = scantable[i];
5681         int level = block[j] * qmat[j];
5682
5683         if(((unsigned)(level+threshold1))>threshold2){
5684             last_non_zero = i;
5685             break;
5686         }
5687     }
5688
5689     for(i=start_i; i<=last_non_zero; i++) {
5690         const int j = scantable[i];
5691         int level = block[j] * qmat[j];
5692
5693 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5694 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5695         if(((unsigned)(level+threshold1))>threshold2){
5696             if(level>0){
5697                 level= (bias + level)>>QMAT_SHIFT;
5698                 coeff[0][i]= level;
5699                 coeff[1][i]= level-1;
5700 //                coeff[2][k]= level-2;
5701             }else{
5702                 level= (bias - level)>>QMAT_SHIFT;
5703                 coeff[0][i]= -level;
5704                 coeff[1][i]= -level+1;
5705 //                coeff[2][k]= -level+2;
5706             }
5707             coeff_count[i]= FFMIN(level, 2);
5708             assert(coeff_count[i]);
5709             max |=level;
5710         }else{
5711             coeff[0][i]= (level>>31)|1;
5712             coeff_count[i]= 1;
5713         }
5714     }
5715
5716     *overflow= s->max_qcoeff < max; //overflow might have happened
5717
5718     if(last_non_zero < start_i){
5719         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5720         return last_non_zero;
5721     }
5722
5723     score_tab[start_i]= 0;
5724     survivor[0]= start_i;
5725     survivor_count= 1;
5726
5727     for(i=start_i; i<=last_non_zero; i++){
5728         int level_index, j;
5729         const int dct_coeff= ABS(block[ scantable[i] ]);
5730         const int zero_distoration= dct_coeff*dct_coeff;
5731         int best_score=256*256*256*120;
5732         for(level_index=0; level_index < coeff_count[i]; level_index++){
5733             int distoration;
5734             int level= coeff[level_index][i];
5735             const int alevel= ABS(level);
5736             int unquant_coeff;
5737
5738             assert(level);
5739
5740             if(s->out_format == FMT_H263){
5741                 unquant_coeff= alevel*qmul + qadd;
5742             }else{ //MPEG1
5743                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5744                 if(s->mb_intra){
5745                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5746                         unquant_coeff =   (unquant_coeff - 1) | 1;
5747                 }else{
5748                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5749                         unquant_coeff =   (unquant_coeff - 1) | 1;
5750                 }
5751                 unquant_coeff<<= 3;
5752             }
5753
5754             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5755             level+=64;
5756             if((level&(~127)) == 0){
5757                 for(j=survivor_count-1; j>=0; j--){
5758                     int run= i - survivor[j];
5759                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5760                     score += score_tab[i-run];
5761
5762                     if(score < best_score){
5763                         best_score= score;
5764                         run_tab[i+1]= run;
5765                         level_tab[i+1]= level-64;
5766                     }
5767                 }
5768
5769                 if(s->out_format == FMT_H263){
5770                     for(j=survivor_count-1; j>=0; j--){
5771                         int run= i - survivor[j];
5772                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5773                         score += score_tab[i-run];
5774                         if(score < last_score){
5775                             last_score= score;
5776                             last_run= run;
5777                             last_level= level-64;
5778                             last_i= i+1;
5779                         }
5780                     }
5781                 }
5782             }else{
5783                 distoration += esc_length*lambda;
5784                 for(j=survivor_count-1; j>=0; j--){
5785                     int run= i - survivor[j];
5786                     int score= distoration + score_tab[i-run];
5787
5788                     if(score < best_score){
5789                         best_score= score;
5790                         run_tab[i+1]= run;
5791                         level_tab[i+1]= level-64;
5792                     }
5793                 }
5794
5795                 if(s->out_format == FMT_H263){
5796                   for(j=survivor_count-1; j>=0; j--){
5797                         int run= i - survivor[j];
5798                         int score= distoration + score_tab[i-run];
5799                         if(score < last_score){
5800                             last_score= score;
5801                             last_run= run;
5802                             last_level= level-64;
5803                             last_i= i+1;
5804                         }
5805                     }
5806                 }
5807             }
5808         }
5809
5810         score_tab[i+1]= best_score;
5811
5812         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5813         if(last_non_zero <= 27){
5814             for(; survivor_count; survivor_count--){
5815                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5816                     break;
5817             }
5818         }else{
5819             for(; survivor_count; survivor_count--){
5820                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5821                     break;
5822             }
5823         }
5824
5825         survivor[ survivor_count++ ]= i+1;
5826     }
5827
5828     if(s->out_format != FMT_H263){
5829         last_score= 256*256*256*120;
5830         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5831             int score= score_tab[i];
5832             if(i) score += lambda*2; //FIXME exacter?
5833
5834             if(score < last_score){
5835                 last_score= score;
5836                 last_i= i;
5837                 last_level= level_tab[i];
5838                 last_run= run_tab[i];
5839             }
5840         }
5841     }
5842
5843     s->coded_score[n] = last_score;
5844
5845     dc= ABS(block[0]);
5846     last_non_zero= last_i - 1;
5847     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5848
5849     if(last_non_zero < start_i)
5850         return last_non_zero;
5851
5852     if(last_non_zero == 0 && start_i == 0){
5853         int best_level= 0;
5854         int best_score= dc * dc;
5855
5856         for(i=0; i<coeff_count[0]; i++){
5857             int level= coeff[i][0];
5858             int alevel= ABS(level);
5859             int unquant_coeff, score, distortion;
5860
5861             if(s->out_format == FMT_H263){
5862                     unquant_coeff= (alevel*qmul + qadd)>>3;
5863             }else{ //MPEG1
5864                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5865                     unquant_coeff =   (unquant_coeff - 1) | 1;
5866             }
5867             unquant_coeff = (unquant_coeff + 4) >> 3;
5868             unquant_coeff<<= 3 + 3;
5869
5870             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5871             level+=64;
5872             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5873             else                    score= distortion + esc_length*lambda;
5874
5875             if(score < best_score){
5876                 best_score= score;
5877                 best_level= level - 64;
5878             }
5879         }
5880         block[0]= best_level;
5881         s->coded_score[n] = best_score - dc*dc;
5882         if(best_level == 0) return -1;
5883         else                return last_non_zero;
5884     }
5885
5886     i= last_i;
5887     assert(last_level);
5888
5889     block[ perm_scantable[last_non_zero] ]= last_level;
5890     i -= last_run + 1;
5891
5892     for(; i>start_i; i -= run_tab[i] + 1){
5893         block[ perm_scantable[i-1] ]= level_tab[i];
5894     }
5895
5896     return last_non_zero;
5897 }
5898
5899 //#define REFINE_STATS 1
5900 static int16_t basis[64][64];
5901
5902 static void build_basis(uint8_t *perm){
5903     int i, j, x, y;
5904     emms_c();
5905     for(i=0; i<8; i++){
5906         for(j=0; j<8; j++){
5907             for(y=0; y<8; y++){
5908                 for(x=0; x<8; x++){
5909                     double s= 0.25*(1<<BASIS_SHIFT);
5910                     int index= 8*i + j;
5911                     int perm_index= perm[index];
5912                     if(i==0) s*= sqrt(0.5);
5913                     if(j==0) s*= sqrt(0.5);
5914                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5915                 }
5916             }
5917         }
5918     }
5919 }
5920
5921 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5922                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5923                         int n, int qscale){
5924     int16_t rem[64];
5925     DCTELEM d1[64] __align16;
5926     const int *qmat;
5927     const uint8_t *scantable= s->intra_scantable.scantable;
5928     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5929 //    unsigned int threshold1, threshold2;
5930 //    int bias=0;
5931     int run_tab[65];
5932     int prev_run=0;
5933     int prev_level=0;
5934     int qmul, qadd, start_i, last_non_zero, i, dc;
5935     uint8_t * length;
5936     uint8_t * last_length;
5937     int lambda;
5938     int rle_index, run, q, sum;
5939 #ifdef REFINE_STATS
5940 static int count=0;
5941 static int after_last=0;
5942 static int to_zero=0;
5943 static int from_zero=0;
5944 static int raise=0;
5945 static int lower=0;
5946 static int messed_sign=0;
5947 #endif
5948
5949     if(basis[0][0] == 0)
5950         build_basis(s->dsp.idct_permutation);
5951
5952     qmul= qscale*2;
5953     qadd= (qscale-1)|1;
5954     if (s->mb_intra) {
5955         if (!s->h263_aic) {
5956             if (n < 4)
5957                 q = s->y_dc_scale;
5958             else
5959                 q = s->c_dc_scale;
5960         } else{
5961             /* For AIC we skip quant/dequant of INTRADC */
5962             q = 1;
5963             qadd=0;
5964         }
5965         q <<= RECON_SHIFT-3;
5966         /* note: block[0] is assumed to be positive */
5967         dc= block[0]*q;
5968 //        block[0] = (block[0] + (q >> 1)) / q;
5969         start_i = 1;
5970         qmat = s->q_intra_matrix[qscale];
5971 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5972 //            bias= 1<<(QMAT_SHIFT-1);
5973         length     = s->intra_ac_vlc_length;
5974         last_length= s->intra_ac_vlc_last_length;
5975     } else {
5976         dc= 0;
5977         start_i = 0;
5978         qmat = s->q_inter_matrix[qscale];
5979         length     = s->inter_ac_vlc_length;
5980         last_length= s->inter_ac_vlc_last_length;
5981     }
5982     last_non_zero = s->block_last_index[n];
5983
5984 #ifdef REFINE_STATS
5985 {START_TIMER
5986 #endif
5987     dc += (1<<(RECON_SHIFT-1));
5988     for(i=0; i<64; i++){
5989         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
5990     }
5991 #ifdef REFINE_STATS
5992 STOP_TIMER("memset rem[]")}
5993 #endif
5994     sum=0;
5995     for(i=0; i<64; i++){
5996         int one= 36;
5997         int qns=4;
5998         int w;
5999
6000         w= ABS(weight[i]) + qns*one;
6001         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6002
6003         weight[i] = w;
6004 //        w=weight[i] = (63*qns + (w/2)) / w;
6005
6006         assert(w>0);
6007         assert(w<(1<<6));
6008         sum += w*w;
6009     }
6010     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6011 #ifdef REFINE_STATS
6012 {START_TIMER
6013 #endif
6014     run=0;
6015     rle_index=0;
6016     for(i=start_i; i<=last_non_zero; i++){
6017         int j= perm_scantable[i];
6018         const int level= block[j];
6019         int coeff;
6020
6021         if(level){
6022             if(level<0) coeff= qmul*level - qadd;
6023             else        coeff= qmul*level + qadd;
6024             run_tab[rle_index++]=run;
6025             run=0;
6026
6027             s->dsp.add_8x8basis(rem, basis[j], coeff);
6028         }else{
6029             run++;
6030         }
6031     }
6032 #ifdef REFINE_STATS
6033 if(last_non_zero>0){
6034 STOP_TIMER("init rem[]")
6035 }
6036 }
6037
6038 {START_TIMER
6039 #endif
6040     for(;;){
6041         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6042         int best_coeff=0;
6043         int best_change=0;
6044         int run2, best_unquant_change=0, analyze_gradient;
6045 #ifdef REFINE_STATS
6046 {START_TIMER
6047 #endif
6048         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6049
6050         if(analyze_gradient){
6051 #ifdef REFINE_STATS
6052 {START_TIMER
6053 #endif
6054             for(i=0; i<64; i++){
6055                 int w= weight[i];
6056
6057                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6058             }
6059 #ifdef REFINE_STATS
6060 STOP_TIMER("rem*w*w")}
6061 {START_TIMER
6062 #endif
6063             s->dsp.fdct(d1);
6064 #ifdef REFINE_STATS
6065 STOP_TIMER("dct")}
6066 #endif
6067         }
6068
6069         if(start_i){
6070             const int level= block[0];
6071             int change, old_coeff;
6072
6073             assert(s->mb_intra);
6074
6075             old_coeff= q*level;
6076
6077             for(change=-1; change<=1; change+=2){
6078                 int new_level= level + change;
6079                 int score, new_coeff;
6080
6081                 new_coeff= q*new_level;
6082                 if(new_coeff >= 2048 || new_coeff < 0)
6083                     continue;
6084
6085                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6086                 if(score<best_score){
6087                     best_score= score;
6088                     best_coeff= 0;
6089                     best_change= change;
6090                     best_unquant_change= new_coeff - old_coeff;
6091                 }
6092             }
6093         }
6094
6095         run=0;
6096         rle_index=0;
6097         run2= run_tab[rle_index++];
6098         prev_level=0;
6099         prev_run=0;
6100
6101         for(i=start_i; i<64; i++){
6102             int j= perm_scantable[i];
6103             const int level= block[j];
6104             int change, old_coeff;
6105
6106             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6107                 break;
6108
6109             if(level){
6110                 if(level<0) old_coeff= qmul*level - qadd;
6111                 else        old_coeff= qmul*level + qadd;
6112                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6113             }else{
6114                 old_coeff=0;
6115                 run2--;
6116                 assert(run2>=0 || i >= last_non_zero );
6117             }
6118
6119             for(change=-1; change<=1; change+=2){
6120                 int new_level= level + change;
6121                 int score, new_coeff, unquant_change;
6122
6123                 score=0;
6124                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
6125                    continue;
6126
6127                 if(new_level){
6128                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6129                     else            new_coeff= qmul*new_level + qadd;
6130                     if(new_coeff >= 2048 || new_coeff <= -2048)
6131                         continue;
6132                     //FIXME check for overflow
6133
6134                     if(level){
6135                         if(level < 63 && level > -63){
6136                             if(i < last_non_zero)
6137                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6138                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6139                             else
6140                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6141                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6142                         }
6143                     }else{
6144                         assert(ABS(new_level)==1);
6145
6146                         if(analyze_gradient){
6147                             int g= d1[ scantable[i] ];
6148                             if(g && (g^new_level) >= 0)
6149                                 continue;
6150                         }
6151
6152                         if(i < last_non_zero){
6153                             int next_i= i + run2 + 1;
6154                             int next_level= block[ perm_scantable[next_i] ] + 64;
6155
6156                             if(next_level&(~127))
6157                                 next_level= 0;
6158
6159                             if(next_i < last_non_zero)
6160                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6161                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6162                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6163                             else
6164                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6165                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6166                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6167                         }else{
6168                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6169                             if(prev_level){
6170                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6171                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6172                             }
6173                         }
6174                     }
6175                 }else{
6176                     new_coeff=0;
6177                     assert(ABS(level)==1);
6178
6179                     if(i < last_non_zero){
6180                         int next_i= i + run2 + 1;
6181                         int next_level= block[ perm_scantable[next_i] ] + 64;
6182
6183                         if(next_level&(~127))
6184                             next_level= 0;
6185
6186                         if(next_i < last_non_zero)
6187                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6188                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6189                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6190                         else
6191                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6192                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6193                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6194                     }else{
6195                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6196                         if(prev_level){
6197                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6198                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6199                         }
6200                     }
6201                 }
6202
6203                 score *= lambda;
6204
6205                 unquant_change= new_coeff - old_coeff;
6206                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6207
6208                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6209                 if(score<best_score){
6210                     best_score= score;
6211                     best_coeff= i;
6212                     best_change= change;
6213                     best_unquant_change= unquant_change;
6214                 }
6215             }
6216             if(level){
6217                 prev_level= level + 64;
6218                 if(prev_level&(~127))
6219                     prev_level= 0;
6220                 prev_run= run;
6221                 run=0;
6222             }else{
6223                 run++;
6224             }
6225         }
6226 #ifdef REFINE_STATS
6227 STOP_TIMER("iterative step")}
6228 #endif
6229
6230         if(best_change){
6231             int j= perm_scantable[ best_coeff ];
6232
6233             block[j] += best_change;
6234
6235             if(best_coeff > last_non_zero){
6236                 last_non_zero= best_coeff;
6237                 assert(block[j]);
6238 #ifdef REFINE_STATS
6239 after_last++;
6240 #endif
6241             }else{
6242 #ifdef REFINE_STATS
6243 if(block[j]){
6244     if(block[j] - best_change){
6245         if(ABS(block[j]) > ABS(block[j] - best_change)){
6246             raise++;
6247         }else{
6248             lower++;
6249         }
6250     }else{
6251         from_zero++;
6252     }
6253 }else{
6254     to_zero++;
6255 }
6256 #endif
6257                 for(; last_non_zero>=start_i; last_non_zero--){
6258                     if(block[perm_scantable[last_non_zero]])
6259                         break;
6260                 }
6261             }
6262 #ifdef REFINE_STATS
6263 count++;
6264 if(256*256*256*64 % count == 0){
6265     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6266 }
6267 #endif
6268             run=0;
6269             rle_index=0;
6270             for(i=start_i; i<=last_non_zero; i++){
6271                 int j= perm_scantable[i];
6272                 const int level= block[j];
6273
6274                  if(level){
6275                      run_tab[rle_index++]=run;
6276                      run=0;
6277                  }else{
6278                      run++;
6279                  }
6280             }
6281
6282             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6283         }else{
6284             break;
6285         }
6286     }
6287 #ifdef REFINE_STATS
6288 if(last_non_zero>0){
6289 STOP_TIMER("iterative search")
6290 }
6291 }
6292 #endif
6293
6294     return last_non_zero;
6295 }
6296
6297 static int dct_quantize_c(MpegEncContext *s,
6298                         DCTELEM *block, int n,
6299                         int qscale, int *overflow)
6300 {
6301     int i, j, level, last_non_zero, q, start_i;
6302     const int *qmat;
6303     const uint8_t *scantable= s->intra_scantable.scantable;
6304     int bias;
6305     int max=0;
6306     unsigned int threshold1, threshold2;
6307
6308     s->dsp.fdct (block);
6309
6310     if(s->dct_error_sum)
6311         s->denoise_dct(s, block);
6312
6313     if (s->mb_intra) {
6314         if (!s->h263_aic) {
6315             if (n < 4)
6316                 q = s->y_dc_scale;
6317             else
6318                 q = s->c_dc_scale;
6319             q = q << 3;
6320         } else
6321             /* For AIC we skip quant/dequant of INTRADC */
6322             q = 1 << 3;
6323
6324         /* note: block[0] is assumed to be positive */
6325         block[0] = (block[0] + (q >> 1)) / q;
6326         start_i = 1;
6327         last_non_zero = 0;
6328         qmat = s->q_intra_matrix[qscale];
6329         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6330     } else {
6331         start_i = 0;
6332         last_non_zero = -1;
6333         qmat = s->q_inter_matrix[qscale];
6334         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6335     }
6336     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6337     threshold2= (threshold1<<1);
6338     for(i=63;i>=start_i;i--) {
6339         j = scantable[i];
6340         level = block[j] * qmat[j];
6341
6342         if(((unsigned)(level+threshold1))>threshold2){
6343             last_non_zero = i;
6344             break;
6345         }else{
6346             block[j]=0;
6347         }
6348     }
6349     for(i=start_i; i<=last_non_zero; i++) {
6350         j = scantable[i];
6351         level = block[j] * qmat[j];
6352
6353 //        if(   bias+level >= (1<<QMAT_SHIFT)
6354 //           || bias-level >= (1<<QMAT_SHIFT)){
6355         if(((unsigned)(level+threshold1))>threshold2){
6356             if(level>0){
6357                 level= (bias + level)>>QMAT_SHIFT;
6358                 block[j]= level;
6359             }else{
6360                 level= (bias - level)>>QMAT_SHIFT;
6361                 block[j]= -level;
6362             }
6363             max |=level;
6364         }else{
6365             block[j]=0;
6366         }
6367     }
6368     *overflow= s->max_qcoeff < max; //overflow might have happened
6369
6370     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6371     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6372         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6373
6374     return last_non_zero;
6375 }
6376
6377 #endif //CONFIG_ENCODERS
6378
6379 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6380                                    DCTELEM *block, int n, int qscale)
6381 {
6382     int i, level, nCoeffs;
6383     const uint16_t *quant_matrix;
6384
6385     nCoeffs= s->block_last_index[n];
6386
6387     if (n < 4)
6388         block[0] = block[0] * s->y_dc_scale;
6389     else
6390         block[0] = block[0] * s->c_dc_scale;
6391     /* XXX: only mpeg1 */
6392     quant_matrix = s->intra_matrix;
6393     for(i=1;i<=nCoeffs;i++) {
6394         int j= s->intra_scantable.permutated[i];
6395         level = block[j];
6396         if (level) {
6397             if (level < 0) {
6398                 level = -level;
6399                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6400                 level = (level - 1) | 1;
6401                 level = -level;
6402             } else {
6403                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6404                 level = (level - 1) | 1;
6405             }
6406             block[j] = level;
6407         }
6408     }
6409 }
6410
6411 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6412                                    DCTELEM *block, int n, int qscale)
6413 {
6414     int i, level, nCoeffs;
6415     const uint16_t *quant_matrix;
6416
6417     nCoeffs= s->block_last_index[n];
6418
6419     quant_matrix = s->inter_matrix;
6420     for(i=0; i<=nCoeffs; i++) {
6421         int j= s->intra_scantable.permutated[i];
6422         level = block[j];
6423         if (level) {
6424             if (level < 0) {
6425                 level = -level;
6426                 level = (((level << 1) + 1) * qscale *
6427                          ((int) (quant_matrix[j]))) >> 4;
6428                 level = (level - 1) | 1;
6429                 level = -level;
6430             } else {
6431                 level = (((level << 1) + 1) * qscale *
6432                          ((int) (quant_matrix[j]))) >> 4;
6433                 level = (level - 1) | 1;
6434             }
6435             block[j] = level;
6436         }
6437     }
6438 }
6439
6440 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6441                                    DCTELEM *block, int n, int qscale)
6442 {
6443     int i, level, nCoeffs;
6444     const uint16_t *quant_matrix;
6445
6446     if(s->alternate_scan) nCoeffs= 63;
6447     else nCoeffs= s->block_last_index[n];
6448
6449     if (n < 4)
6450         block[0] = block[0] * s->y_dc_scale;
6451     else
6452         block[0] = block[0] * s->c_dc_scale;
6453     quant_matrix = s->intra_matrix;
6454     for(i=1;i<=nCoeffs;i++) {
6455         int j= s->intra_scantable.permutated[i];
6456         level = block[j];
6457         if (level) {
6458             if (level < 0) {
6459                 level = -level;
6460                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6461                 level = -level;
6462             } else {
6463                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6464             }
6465             block[j] = level;
6466         }
6467     }
6468 }
6469
6470 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6471                                    DCTELEM *block, int n, int qscale)
6472 {
6473     int i, level, nCoeffs;
6474     const uint16_t *quant_matrix;
6475     int sum=-1;
6476
6477     if(s->alternate_scan) nCoeffs= 63;
6478     else nCoeffs= s->block_last_index[n];
6479
6480     quant_matrix = s->inter_matrix;
6481     for(i=0; i<=nCoeffs; i++) {
6482         int j= s->intra_scantable.permutated[i];
6483         level = block[j];
6484         if (level) {
6485             if (level < 0) {
6486                 level = -level;
6487                 level = (((level << 1) + 1) * qscale *
6488                          ((int) (quant_matrix[j]))) >> 4;
6489                 level = -level;
6490             } else {
6491                 level = (((level << 1) + 1) * qscale *
6492                          ((int) (quant_matrix[j]))) >> 4;
6493             }
6494             block[j] = level;
6495             sum+=level;
6496         }
6497     }
6498     block[63]^=sum&1;
6499 }
6500
6501 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6502                                   DCTELEM *block, int n, int qscale)
6503 {
6504     int i, level, qmul, qadd;
6505     int nCoeffs;
6506
6507     assert(s->block_last_index[n]>=0);
6508
6509     qmul = qscale << 1;
6510
6511     if (!s->h263_aic) {
6512         if (n < 4)
6513             block[0] = block[0] * s->y_dc_scale;
6514         else
6515             block[0] = block[0] * s->c_dc_scale;
6516         qadd = (qscale - 1) | 1;
6517     }else{
6518         qadd = 0;
6519     }
6520     if(s->ac_pred)
6521         nCoeffs=63;
6522     else
6523         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6524
6525     for(i=1; i<=nCoeffs; i++) {
6526         level = block[i];
6527         if (level) {
6528             if (level < 0) {
6529                 level = level * qmul - qadd;
6530             } else {
6531                 level = level * qmul + qadd;
6532             }
6533             block[i] = level;
6534         }
6535     }
6536 }
6537
6538 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6539                                   DCTELEM *block, int n, int qscale)
6540 {
6541     int i, level, qmul, qadd;
6542     int nCoeffs;
6543
6544     assert(s->block_last_index[n]>=0);
6545
6546     qadd = (qscale - 1) | 1;
6547     qmul = qscale << 1;
6548
6549     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6550
6551     for(i=0; i<=nCoeffs; i++) {
6552         level = block[i];
6553         if (level) {
6554             if (level < 0) {
6555                 level = level * qmul - qadd;
6556             } else {
6557                 level = level * qmul + qadd;
6558             }
6559             block[i] = level;
6560         }
6561     }
6562 }
6563
6564 #ifdef CONFIG_ENCODERS
6565 AVCodec h263_encoder = {
6566     "h263",
6567     CODEC_TYPE_VIDEO,
6568     CODEC_ID_H263,
6569     sizeof(MpegEncContext),
6570     MPV_encode_init,
6571     MPV_encode_picture,
6572     MPV_encode_end,
6573     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6574 };
6575
6576 AVCodec h263p_encoder = {
6577     "h263p",
6578     CODEC_TYPE_VIDEO,
6579     CODEC_ID_H263P,
6580     sizeof(MpegEncContext),
6581     MPV_encode_init,
6582     MPV_encode_picture,
6583     MPV_encode_end,
6584     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6585 };
6586
6587 AVCodec flv_encoder = {
6588     "flv",
6589     CODEC_TYPE_VIDEO,
6590     CODEC_ID_FLV1,
6591     sizeof(MpegEncContext),
6592     MPV_encode_init,
6593     MPV_encode_picture,
6594     MPV_encode_end,
6595     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6596 };
6597
6598 AVCodec rv10_encoder = {
6599     "rv10",
6600     CODEC_TYPE_VIDEO,
6601     CODEC_ID_RV10,
6602     sizeof(MpegEncContext),
6603     MPV_encode_init,
6604     MPV_encode_picture,
6605     MPV_encode_end,
6606     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6607 };
6608
6609 AVCodec rv20_encoder = {
6610     "rv20",
6611     CODEC_TYPE_VIDEO,
6612     CODEC_ID_RV20,
6613     sizeof(MpegEncContext),
6614     MPV_encode_init,
6615     MPV_encode_picture,
6616     MPV_encode_end,
6617     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6618 };
6619
6620 AVCodec mpeg4_encoder = {
6621     "mpeg4",
6622     CODEC_TYPE_VIDEO,
6623     CODEC_ID_MPEG4,
6624     sizeof(MpegEncContext),
6625     MPV_encode_init,
6626     MPV_encode_picture,
6627     MPV_encode_end,
6628     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6629     .capabilities= CODEC_CAP_DELAY,
6630 };
6631
6632 AVCodec msmpeg4v1_encoder = {
6633     "msmpeg4v1",
6634     CODEC_TYPE_VIDEO,
6635     CODEC_ID_MSMPEG4V1,
6636     sizeof(MpegEncContext),
6637     MPV_encode_init,
6638     MPV_encode_picture,
6639     MPV_encode_end,
6640     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6641 };
6642
6643 AVCodec msmpeg4v2_encoder = {
6644     "msmpeg4v2",
6645     CODEC_TYPE_VIDEO,
6646     CODEC_ID_MSMPEG4V2,
6647     sizeof(MpegEncContext),
6648     MPV_encode_init,
6649     MPV_encode_picture,
6650     MPV_encode_end,
6651     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6652 };
6653
6654 AVCodec msmpeg4v3_encoder = {
6655     "msmpeg4",
6656     CODEC_TYPE_VIDEO,
6657     CODEC_ID_MSMPEG4V3,
6658     sizeof(MpegEncContext),
6659     MPV_encode_init,
6660     MPV_encode_picture,
6661     MPV_encode_end,
6662     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6663 };
6664
6665 AVCodec wmv1_encoder = {
6666     "wmv1",
6667     CODEC_TYPE_VIDEO,
6668     CODEC_ID_WMV1,
6669     sizeof(MpegEncContext),
6670     MPV_encode_init,
6671     MPV_encode_picture,
6672     MPV_encode_end,
6673     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6674 };
6675
6676 AVCodec mjpeg_encoder = {
6677     "mjpeg",
6678     CODEC_TYPE_VIDEO,
6679     CODEC_ID_MJPEG,
6680     sizeof(MpegEncContext),
6681     MPV_encode_init,
6682     MPV_encode_picture,
6683     MPV_encode_end,
6684     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6685 };
6686
6687 #endif //CONFIG_ENCODERS