]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
Add a newline to b_frame_strategy error message.
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  *
20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
21  */
22
23 /**
24  * @file mpegvideo.c
25  * The simplest mpeg encoder (well, it was the simplest!).
26  */
27
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "mpegvideo.h"
31 #include "faandct.h"
32 #include <limits.h>
33
34 #ifdef USE_FASTMEMCPY
35 #include "fastmemcpy.h"
36 #endif
37
38 //#undef NDEBUG
39 //#include <assert.h>
40
41 #ifdef CONFIG_ENCODERS
42 static void encode_picture(MpegEncContext *s, int picture_number);
43 #endif //CONFIG_ENCODERS
44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
53                                   DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
55                                   DCTELEM *block, int n, int qscale);
56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
57 #ifdef CONFIG_ENCODERS
58 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
59 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
60 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
61 static int sse_mb(MpegEncContext *s);
62 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
63 #endif //CONFIG_ENCODERS
64
65 #ifdef HAVE_XVMC
66 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
67 extern void XVMC_field_end(MpegEncContext *s);
68 extern void XVMC_decode_mb(MpegEncContext *s);
69 #endif
70
71 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
72
73
74 /* enable all paranoid tests for rounding, overflows, etc... */
75 //#define PARANOID
76
77 //#define DEBUG
78
79
80 /* for jpeg fast DCT */
81 #define CONST_BITS 14
82
83 static const uint16_t aanscales[64] = {
84     /* precomputed values scaled up by 14 bits */
85     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
86     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
87     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
88     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
90     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
91     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
92     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
93 };
94
95 static const uint8_t h263_chroma_roundtab[16] = {
96 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
97     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
98 };
99
100 static const uint8_t ff_default_chroma_qscale_table[32]={
101 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
102     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
103 };
104
105 #ifdef CONFIG_ENCODERS
106 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
107 static uint8_t default_fcode_tab[MAX_MV*2+1];
108
109 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
110
111 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
112                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
113 {
114     int qscale;
115     int shift=0;
116
117     for(qscale=qmin; qscale<=qmax; qscale++){
118         int i;
119         if (dsp->fdct == ff_jpeg_fdct_islow
120 #ifdef FAAN_POSTSCALE
121             || dsp->fdct == ff_faandct
122 #endif
123             ) {
124             for(i=0;i<64;i++) {
125                 const int j= dsp->idct_permutation[i];
126                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
127                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
128                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
129                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
130
131                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
132                                 (qscale * quant_matrix[j]));
133             }
134         } else if (dsp->fdct == fdct_ifast
135 #ifndef FAAN_POSTSCALE
136                    || dsp->fdct == ff_faandct
137 #endif
138                    ) {
139             for(i=0;i<64;i++) {
140                 const int j= dsp->idct_permutation[i];
141                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
142                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
143                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
144                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
145
146                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
147                                 (aanscales[i] * qscale * quant_matrix[j]));
148             }
149         } else {
150             for(i=0;i<64;i++) {
151                 const int j= dsp->idct_permutation[i];
152                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
153                    So 16           <= qscale * quant_matrix[i]             <= 7905
154                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
155                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
156                 */
157                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
158 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
159                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
160
161                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
162                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
163             }
164         }
165
166         for(i=intra; i<64; i++){
167             int64_t max= 8191;
168             if (dsp->fdct == fdct_ifast
169 #ifndef FAAN_POSTSCALE
170                    || dsp->fdct == ff_faandct
171 #endif
172                    ) {
173                 max= (8191LL*aanscales[i]) >> 14;
174             }
175             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
176                 shift++;
177             }
178         }
179     }
180     if(shift){
181         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
182     }
183 }
184
185 static inline void update_qscale(MpegEncContext *s){
186     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
187     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
188
189     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
190 }
191 #endif //CONFIG_ENCODERS
192
193 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
194     int i;
195     int end;
196
197     st->scantable= src_scantable;
198
199     for(i=0; i<64; i++){
200         int j;
201         j = src_scantable[i];
202         st->permutated[i] = permutation[j];
203 #ifdef ARCH_POWERPC
204         st->inverse[j] = i;
205 #endif
206     }
207
208     end=-1;
209     for(i=0; i<64; i++){
210         int j;
211         j = st->permutated[i];
212         if(j>end) end=j;
213         st->raster_end[i]= end;
214     }
215 }
216
217 #ifdef CONFIG_ENCODERS
218 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
219     int i;
220
221     if(matrix){
222         put_bits(pb, 1, 1);
223         for(i=0;i<64;i++) {
224             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
225         }
226     }else
227         put_bits(pb, 1, 0);
228 }
229 #endif //CONFIG_ENCODERS
230
231 /* init common dct for both encoder and decoder */
232 int DCT_common_init(MpegEncContext *s)
233 {
234     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
235     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
236     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
237     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
238     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
239     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
240
241 #ifdef CONFIG_ENCODERS
242     s->dct_quantize= dct_quantize_c;
243     s->denoise_dct= denoise_dct_c;
244 #endif //CONFIG_ENCODERS
245
246 #ifdef HAVE_MMX
247     MPV_common_init_mmx(s);
248 #endif
249 #ifdef ARCH_ALPHA
250     MPV_common_init_axp(s);
251 #endif
252 #ifdef HAVE_MLIB
253     MPV_common_init_mlib(s);
254 #endif
255 #ifdef HAVE_MMI
256     MPV_common_init_mmi(s);
257 #endif
258 #ifdef ARCH_ARMV4L
259     MPV_common_init_armv4l(s);
260 #endif
261 #ifdef ARCH_POWERPC
262     MPV_common_init_ppc(s);
263 #endif
264
265 #ifdef CONFIG_ENCODERS
266     s->fast_dct_quantize= s->dct_quantize;
267
268     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
269         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
270     }
271
272 #endif //CONFIG_ENCODERS
273
274     /* load & permutate scantables
275        note: only wmv uses different ones
276     */
277     if(s->alternate_scan){
278         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
279         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
280     }else{
281         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
282         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
283     }
284     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
285     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
286
287     return 0;
288 }
289
290 static void copy_picture(Picture *dst, Picture *src){
291     *dst = *src;
292     dst->type= FF_BUFFER_TYPE_COPY;
293 }
294
295 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
296     int i;
297
298     dst->pict_type              = src->pict_type;
299     dst->quality                = src->quality;
300     dst->coded_picture_number   = src->coded_picture_number;
301     dst->display_picture_number = src->display_picture_number;
302 //    dst->reference              = src->reference;
303     dst->pts                    = src->pts;
304     dst->interlaced_frame       = src->interlaced_frame;
305     dst->top_field_first        = src->top_field_first;
306
307     if(s->avctx->me_threshold){
308         if(!src->motion_val[0])
309             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
310         if(!src->mb_type)
311             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
312         if(!src->ref_index[0])
313             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
314         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
315             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
316             src->motion_subsample_log2, dst->motion_subsample_log2);
317
318         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
319
320         for(i=0; i<2; i++){
321             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
322             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
323
324             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
325                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
326             }
327             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
328                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
329             }
330         }
331     }
332 }
333
334 /**
335  * allocates a Picture
336  * The pixels are allocated/set by calling get_buffer() if shared=0
337  */
338 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
339     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
340     const int mb_array_size= s->mb_stride*s->mb_height;
341     const int b8_array_size= s->b8_stride*s->mb_height*2;
342     const int b4_array_size= s->b4_stride*s->mb_height*4;
343     int i;
344
345     if(shared){
346         assert(pic->data[0]);
347         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
348         pic->type= FF_BUFFER_TYPE_SHARED;
349     }else{
350         int r;
351
352         assert(!pic->data[0]);
353
354         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
355
356         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
357             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
358             return -1;
359         }
360
361         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
362             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
363             return -1;
364         }
365
366         if(pic->linesize[1] != pic->linesize[2]){
367             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
368             return -1;
369         }
370
371         s->linesize  = pic->linesize[0];
372         s->uvlinesize= pic->linesize[1];
373     }
374
375     if(pic->qscale_table==NULL){
376         if (s->encoding) {
377             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
378             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
379             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
380         }
381
382         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
383         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
384         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
385         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
386         if(s->out_format == FMT_H264){
387             for(i=0; i<2; i++){
388                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
389                 pic->motion_val[i]= pic->motion_val_base[i]+4;
390                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
391             }
392             pic->motion_subsample_log2= 2;
393         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
394             for(i=0; i<2; i++){
395                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
396                 pic->motion_val[i]= pic->motion_val_base[i]+4;
397                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
398             }
399             pic->motion_subsample_log2= 3;
400         }
401         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
402             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
403         }
404         pic->qstride= s->mb_stride;
405         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
406     }
407
408     //it might be nicer if the application would keep track of these but it would require a API change
409     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
410     s->prev_pict_types[0]= s->pict_type;
411     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
412         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
413
414     return 0;
415 fail: //for the CHECKED_ALLOCZ macro
416     return -1;
417 }
418
419 /**
420  * deallocates a picture
421  */
422 static void free_picture(MpegEncContext *s, Picture *pic){
423     int i;
424
425     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
426         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
427     }
428
429     av_freep(&pic->mb_var);
430     av_freep(&pic->mc_mb_var);
431     av_freep(&pic->mb_mean);
432     av_freep(&pic->mbskip_table);
433     av_freep(&pic->qscale_table);
434     av_freep(&pic->mb_type_base);
435     av_freep(&pic->dct_coeff);
436     av_freep(&pic->pan_scan);
437     pic->mb_type= NULL;
438     for(i=0; i<2; i++){
439         av_freep(&pic->motion_val_base[i]);
440         av_freep(&pic->ref_index[i]);
441     }
442
443     if(pic->type == FF_BUFFER_TYPE_SHARED){
444         for(i=0; i<4; i++){
445             pic->base[i]=
446             pic->data[i]= NULL;
447         }
448         pic->type= 0;
449     }
450 }
451
452 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
453     int i;
454
455     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
456     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
457     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
458
459      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
460     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
461     s->rd_scratchpad=   s->me.scratchpad;
462     s->b_scratchpad=    s->me.scratchpad;
463     s->obmc_scratchpad= s->me.scratchpad + 16;
464     if (s->encoding) {
465         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
466         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
467         if(s->avctx->noise_reduction){
468             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
469         }
470     }
471     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
472     s->block= s->blocks[0];
473
474     for(i=0;i<12;i++){
475         s->pblocks[i] = (short *)(&s->block[i]);
476     }
477     return 0;
478 fail:
479     return -1; //free() through MPV_common_end()
480 }
481
482 static void free_duplicate_context(MpegEncContext *s){
483     if(s==NULL) return;
484
485     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
486     av_freep(&s->me.scratchpad);
487     s->rd_scratchpad=
488     s->b_scratchpad=
489     s->obmc_scratchpad= NULL;
490
491     av_freep(&s->dct_error_sum);
492     av_freep(&s->me.map);
493     av_freep(&s->me.score_map);
494     av_freep(&s->blocks);
495     s->block= NULL;
496 }
497
498 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
499 #define COPY(a) bak->a= src->a
500     COPY(allocated_edge_emu_buffer);
501     COPY(edge_emu_buffer);
502     COPY(me.scratchpad);
503     COPY(rd_scratchpad);
504     COPY(b_scratchpad);
505     COPY(obmc_scratchpad);
506     COPY(me.map);
507     COPY(me.score_map);
508     COPY(blocks);
509     COPY(block);
510     COPY(start_mb_y);
511     COPY(end_mb_y);
512     COPY(me.map_generation);
513     COPY(pb);
514     COPY(dct_error_sum);
515     COPY(dct_count[0]);
516     COPY(dct_count[1]);
517 #undef COPY
518 }
519
520 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
521     MpegEncContext bak;
522     int i;
523     //FIXME copy only needed parts
524 //START_TIMER
525     backup_duplicate_context(&bak, dst);
526     memcpy(dst, src, sizeof(MpegEncContext));
527     backup_duplicate_context(dst, &bak);
528     for(i=0;i<12;i++){
529         dst->pblocks[i] = (short *)(&dst->block[i]);
530     }
531 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
532 }
533
534 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
535 #define COPY(a) dst->a= src->a
536     COPY(pict_type);
537     COPY(current_picture);
538     COPY(f_code);
539     COPY(b_code);
540     COPY(qscale);
541     COPY(lambda);
542     COPY(lambda2);
543     COPY(picture_in_gop_number);
544     COPY(gop_picture_number);
545     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
546     COPY(progressive_frame); //FIXME don't set in encode_header
547     COPY(partitioned_frame); //FIXME don't set in encode_header
548 #undef COPY
549 }
550
551 /**
552  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
553  * the changed fields will not depend upon the prior state of the MpegEncContext.
554  */
555 static void MPV_common_defaults(MpegEncContext *s){
556     s->y_dc_scale_table=
557     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
558     s->chroma_qscale_table= ff_default_chroma_qscale_table;
559     s->progressive_frame= 1;
560     s->progressive_sequence= 1;
561     s->picture_structure= PICT_FRAME;
562
563     s->coded_picture_number = 0;
564     s->picture_number = 0;
565     s->input_picture_number = 0;
566
567     s->picture_in_gop_number = 0;
568
569     s->f_code = 1;
570     s->b_code = 1;
571 }
572
573 /**
574  * sets the given MpegEncContext to defaults for decoding.
575  * the changed fields will not depend upon the prior state of the MpegEncContext.
576  */
577 void MPV_decode_defaults(MpegEncContext *s){
578     MPV_common_defaults(s);
579 }
580
581 /**
582  * sets the given MpegEncContext to defaults for encoding.
583  * the changed fields will not depend upon the prior state of the MpegEncContext.
584  */
585
586 #ifdef CONFIG_ENCODERS
587 static void MPV_encode_defaults(MpegEncContext *s){
588     static int done=0;
589
590     MPV_common_defaults(s);
591
592     if(!done){
593         int i;
594         done=1;
595
596         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
597         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
598
599         for(i=-16; i<16; i++){
600             default_fcode_tab[i + MAX_MV]= 1;
601         }
602     }
603     s->me.mv_penalty= default_mv_penalty;
604     s->fcode_tab= default_fcode_tab;
605 }
606 #endif //CONFIG_ENCODERS
607
608 /**
609  * init common structure for both encoder and decoder.
610  * this assumes that some variables like width/height are already set
611  */
612 int MPV_common_init(MpegEncContext *s)
613 {
614     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
615
616     if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
617         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
618         return -1;
619     }
620
621     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
622         return -1;
623
624     dsputil_init(&s->dsp, s->avctx);
625     DCT_common_init(s);
626
627     s->flags= s->avctx->flags;
628     s->flags2= s->avctx->flags2;
629
630     s->mb_width  = (s->width  + 15) / 16;
631     s->mb_height = (s->height + 15) / 16;
632     s->mb_stride = s->mb_width + 1;
633     s->b8_stride = s->mb_width*2 + 1;
634     s->b4_stride = s->mb_width*4 + 1;
635     mb_array_size= s->mb_height * s->mb_stride;
636     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
637
638     /* set chroma shifts */
639     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
640                                                     &(s->chroma_y_shift) );
641
642     /* set default edge pos, will be overriden in decode_header if needed */
643     s->h_edge_pos= s->mb_width*16;
644     s->v_edge_pos= s->mb_height*16;
645
646     s->mb_num = s->mb_width * s->mb_height;
647
648     s->block_wrap[0]=
649     s->block_wrap[1]=
650     s->block_wrap[2]=
651     s->block_wrap[3]= s->b8_stride;
652     s->block_wrap[4]=
653     s->block_wrap[5]= s->mb_stride;
654
655     y_size = s->b8_stride * (2 * s->mb_height + 1);
656     c_size = s->mb_stride * (s->mb_height + 1);
657     yc_size = y_size + 2 * c_size;
658
659     /* convert fourcc to upper case */
660     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
661                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
662                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
663                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
664
665     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
666                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
667                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
668                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
669
670     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
671
672     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
673     for(y=0; y<s->mb_height; y++){
674         for(x=0; x<s->mb_width; x++){
675             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
676         }
677     }
678     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
679
680     if (s->encoding) {
681         /* Allocate MV tables */
682         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
683         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
684         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
685         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
686         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
687         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
688         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
689         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
690         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
691         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
692         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
693         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
694
695         if(s->msmpeg4_version){
696             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
697         }
698         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
699
700         /* Allocate MB type table */
701         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
702
703         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
704
705         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
706         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
707         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
708         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
709         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
710         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
711
712         if(s->avctx->noise_reduction){
713             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
714         }
715     }
716     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
717
718     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
719
720     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
721         /* interlaced direct mode decoding tables */
722             for(i=0; i<2; i++){
723                 int j, k;
724                 for(j=0; j<2; j++){
725                     for(k=0; k<2; k++){
726                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
727                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
728                     }
729                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
730                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
731                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
732                 }
733                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
734             }
735     }
736     if (s->out_format == FMT_H263) {
737         /* ac values */
738         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
739         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
740         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
741         s->ac_val[2] = s->ac_val[1] + c_size;
742
743         /* cbp values */
744         CHECKED_ALLOCZ(s->coded_block_base, y_size);
745         s->coded_block= s->coded_block_base + s->b8_stride + 1;
746
747         /* cbp, ac_pred, pred_dir */
748         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
749         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
750     }
751
752     if (s->h263_pred || s->h263_plus || !s->encoding) {
753         /* dc values */
754         //MN: we need these for error resilience of intra-frames
755         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
756         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
757         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
758         s->dc_val[2] = s->dc_val[1] + c_size;
759         for(i=0;i<yc_size;i++)
760             s->dc_val_base[i] = 1024;
761     }
762
763     /* which mb is a intra block */
764     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
765     memset(s->mbintra_table, 1, mb_array_size);
766
767     /* init macroblock skip table */
768     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
769     //Note the +1 is for a quicker mpeg4 slice_end detection
770     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
771
772     s->parse_context.state= -1;
773     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
774        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
775        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
776        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
777     }
778
779     s->context_initialized = 1;
780
781     s->thread_context[0]= s;
782     for(i=1; i<s->avctx->thread_count; i++){
783         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
784         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
785     }
786
787     for(i=0; i<s->avctx->thread_count; i++){
788         if(init_duplicate_context(s->thread_context[i], s) < 0)
789            goto fail;
790         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
791         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
792     }
793
794     return 0;
795  fail:
796     MPV_common_end(s);
797     return -1;
798 }
799
800 /* init common structure for both encoder and decoder */
801 void MPV_common_end(MpegEncContext *s)
802 {
803     int i, j, k;
804
805     for(i=0; i<s->avctx->thread_count; i++){
806         free_duplicate_context(s->thread_context[i]);
807     }
808     for(i=1; i<s->avctx->thread_count; i++){
809         av_freep(&s->thread_context[i]);
810     }
811
812     av_freep(&s->parse_context.buffer);
813     s->parse_context.buffer_size=0;
814
815     av_freep(&s->mb_type);
816     av_freep(&s->p_mv_table_base);
817     av_freep(&s->b_forw_mv_table_base);
818     av_freep(&s->b_back_mv_table_base);
819     av_freep(&s->b_bidir_forw_mv_table_base);
820     av_freep(&s->b_bidir_back_mv_table_base);
821     av_freep(&s->b_direct_mv_table_base);
822     s->p_mv_table= NULL;
823     s->b_forw_mv_table= NULL;
824     s->b_back_mv_table= NULL;
825     s->b_bidir_forw_mv_table= NULL;
826     s->b_bidir_back_mv_table= NULL;
827     s->b_direct_mv_table= NULL;
828     for(i=0; i<2; i++){
829         for(j=0; j<2; j++){
830             for(k=0; k<2; k++){
831                 av_freep(&s->b_field_mv_table_base[i][j][k]);
832                 s->b_field_mv_table[i][j][k]=NULL;
833             }
834             av_freep(&s->b_field_select_table[i][j]);
835             av_freep(&s->p_field_mv_table_base[i][j]);
836             s->p_field_mv_table[i][j]=NULL;
837         }
838         av_freep(&s->p_field_select_table[i]);
839     }
840
841     av_freep(&s->dc_val_base);
842     av_freep(&s->ac_val_base);
843     av_freep(&s->coded_block_base);
844     av_freep(&s->mbintra_table);
845     av_freep(&s->cbp_table);
846     av_freep(&s->pred_dir_table);
847
848     av_freep(&s->mbskip_table);
849     av_freep(&s->prev_pict_types);
850     av_freep(&s->bitstream_buffer);
851     s->allocated_bitstream_buffer_size=0;
852
853     av_freep(&s->avctx->stats_out);
854     av_freep(&s->ac_stats);
855     av_freep(&s->error_status_table);
856     av_freep(&s->mb_index2xy);
857     av_freep(&s->lambda_table);
858     av_freep(&s->q_intra_matrix);
859     av_freep(&s->q_inter_matrix);
860     av_freep(&s->q_intra_matrix16);
861     av_freep(&s->q_inter_matrix16);
862     av_freep(&s->input_picture);
863     av_freep(&s->reordered_input_picture);
864     av_freep(&s->dct_offset);
865
866     if(s->picture){
867         for(i=0; i<MAX_PICTURE_COUNT; i++){
868             free_picture(s, &s->picture[i]);
869         }
870     }
871     av_freep(&s->picture);
872     s->context_initialized = 0;
873     s->last_picture_ptr=
874     s->next_picture_ptr=
875     s->current_picture_ptr= NULL;
876     s->linesize= s->uvlinesize= 0;
877
878     for(i=0; i<3; i++)
879         av_freep(&s->visualization_buffer[i]);
880
881     avcodec_default_free_buffers(s->avctx);
882 }
883
884 #ifdef CONFIG_ENCODERS
885
886 /* init video encoder */
887 int MPV_encode_init(AVCodecContext *avctx)
888 {
889     MpegEncContext *s = avctx->priv_data;
890     int i;
891     int chroma_h_shift, chroma_v_shift;
892
893     MPV_encode_defaults(s);
894
895     if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
896         av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
897         return -1;
898     }
899
900     if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
901         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){
902             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
903             return -1;
904         }
905     }else{
906         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){
907             av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
908             return -1;
909         }
910     }
911
912     s->bit_rate = avctx->bit_rate;
913     s->width = avctx->width;
914     s->height = avctx->height;
915     if(avctx->gop_size > 600){
916         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
917         avctx->gop_size=600;
918     }
919     s->gop_size = avctx->gop_size;
920     s->avctx = avctx;
921     s->flags= avctx->flags;
922     s->flags2= avctx->flags2;
923     s->max_b_frames= avctx->max_b_frames;
924     s->codec_id= avctx->codec->id;
925     s->luma_elim_threshold  = avctx->luma_elim_threshold;
926     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
927     s->strict_std_compliance= avctx->strict_std_compliance;
928     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
929     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
930     s->mpeg_quant= avctx->mpeg_quant;
931     s->rtp_mode= !!avctx->rtp_payload_size;
932     s->intra_dc_precision= avctx->intra_dc_precision;
933     s->user_specified_pts = AV_NOPTS_VALUE;
934
935     if (s->gop_size <= 1) {
936         s->intra_only = 1;
937         s->gop_size = 12;
938     } else {
939         s->intra_only = 0;
940     }
941
942     s->me_method = avctx->me_method;
943
944     /* Fixed QSCALE */
945     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
946
947     s->adaptive_quant= (   s->avctx->lumi_masking
948                         || s->avctx->dark_masking
949                         || s->avctx->temporal_cplx_masking
950                         || s->avctx->spatial_cplx_masking
951                         || s->avctx->p_masking
952                         || s->avctx->border_masking
953                         || (s->flags&CODEC_FLAG_QP_RD))
954                        && !s->fixed_qscale;
955
956     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
957     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
958     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
959
960     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
961         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
962         return -1;
963     }
964
965     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
966         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
967     }
968
969     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
970         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
971         return -1;
972     }
973
974     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
975         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
976         return -1;
977     }
978
979     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
980        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
981        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
982
983         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
984     }
985
986     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
987        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
988         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
989         return -1;
990     }
991
992     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
993         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
994         return -1;
995     }
996
997     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
998         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
999         return -1;
1000     }
1001
1002     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1003         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1004         return -1;
1005     }
1006
1007     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1008         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1009         return -1;
1010     }
1011
1012     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1013         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1014         return -1;
1015     }
1016
1017     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1018        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1019         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1020         return -1;
1021     }
1022
1023     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1024         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1025         return -1;
1026     }
1027
1028     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1029         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1030         return -1;
1031     }
1032
1033     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1034         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1035         return -1;
1036     }
1037
1038     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1039         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1040         return -1;
1041     }
1042
1043     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1044        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1045        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1046         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1047         return -1;
1048     }
1049
1050     if(s->avctx->thread_count > 1)
1051         s->rtp_mode= 1;
1052
1053     if(!avctx->time_base.den || !avctx->time_base.num){
1054         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1055         return -1;
1056     }
1057
1058     i= (INT_MAX/2+128)>>8;
1059     if(avctx->me_threshold >= i){
1060         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1061         return -1;
1062     }
1063     if(avctx->mb_threshold >= i){
1064         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1065         return -1;
1066     }
1067
1068     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1069         av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass\n");
1070         return -1;
1071     }
1072
1073     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1074     if(i > 1){
1075         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1076         avctx->time_base.den /= i;
1077         avctx->time_base.num /= i;
1078 //        return -1;
1079     }
1080
1081     if(s->codec_id==CODEC_ID_MJPEG){
1082         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1083         s->inter_quant_bias= 0;
1084     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1085         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1086         s->inter_quant_bias= 0;
1087     }else{
1088         s->intra_quant_bias=0;
1089         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1090     }
1091
1092     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1093         s->intra_quant_bias= avctx->intra_quant_bias;
1094     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1095         s->inter_quant_bias= avctx->inter_quant_bias;
1096
1097     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1098
1099     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1100         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1101         return -1;
1102     }
1103     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1104
1105     switch(avctx->codec->id) {
1106     case CODEC_ID_MPEG1VIDEO:
1107         s->out_format = FMT_MPEG1;
1108         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1109         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1110         break;
1111     case CODEC_ID_MPEG2VIDEO:
1112         s->out_format = FMT_MPEG1;
1113         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1114         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1115         s->rtp_mode= 1;
1116         break;
1117     case CODEC_ID_LJPEG:
1118     case CODEC_ID_JPEGLS:
1119     case CODEC_ID_MJPEG:
1120         s->out_format = FMT_MJPEG;
1121         s->intra_only = 1; /* force intra only for jpeg */
1122         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1123         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1124         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1125         s->mjpeg_vsample[1] = 1;
1126         s->mjpeg_vsample[2] = 1;
1127         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1128         s->mjpeg_hsample[1] = 1;
1129         s->mjpeg_hsample[2] = 1;
1130         if (mjpeg_init(s) < 0)
1131             return -1;
1132         avctx->delay=0;
1133         s->low_delay=1;
1134         break;
1135     case CODEC_ID_H261:
1136         s->out_format = FMT_H261;
1137         avctx->delay=0;
1138         s->low_delay=1;
1139         break;
1140     case CODEC_ID_H263:
1141         if (h263_get_picture_format(s->width, s->height) == 7) {
1142             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1143             return -1;
1144         }
1145         s->out_format = FMT_H263;
1146         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1147         avctx->delay=0;
1148         s->low_delay=1;
1149         break;
1150     case CODEC_ID_H263P:
1151         s->out_format = FMT_H263;
1152         s->h263_plus = 1;
1153         /* Fx */
1154         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1155         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1156         s->modified_quant= s->h263_aic;
1157         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1158         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1159         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1160         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1161         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1162
1163         /* /Fx */
1164         /* These are just to be sure */
1165         avctx->delay=0;
1166         s->low_delay=1;
1167         break;
1168     case CODEC_ID_FLV1:
1169         s->out_format = FMT_H263;
1170         s->h263_flv = 2; /* format = 1; 11-bit codes */
1171         s->unrestricted_mv = 1;
1172         s->rtp_mode=0; /* don't allow GOB */
1173         avctx->delay=0;
1174         s->low_delay=1;
1175         break;
1176     case CODEC_ID_RV10:
1177         s->out_format = FMT_H263;
1178         avctx->delay=0;
1179         s->low_delay=1;
1180         break;
1181     case CODEC_ID_RV20:
1182         s->out_format = FMT_H263;
1183         avctx->delay=0;
1184         s->low_delay=1;
1185         s->modified_quant=1;
1186         s->h263_aic=1;
1187         s->h263_plus=1;
1188         s->loop_filter=1;
1189         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1190         break;
1191     case CODEC_ID_MPEG4:
1192         s->out_format = FMT_H263;
1193         s->h263_pred = 1;
1194         s->unrestricted_mv = 1;
1195         s->low_delay= s->max_b_frames ? 0 : 1;
1196         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1197         break;
1198     case CODEC_ID_MSMPEG4V1:
1199         s->out_format = FMT_H263;
1200         s->h263_msmpeg4 = 1;
1201         s->h263_pred = 1;
1202         s->unrestricted_mv = 1;
1203         s->msmpeg4_version= 1;
1204         avctx->delay=0;
1205         s->low_delay=1;
1206         break;
1207     case CODEC_ID_MSMPEG4V2:
1208         s->out_format = FMT_H263;
1209         s->h263_msmpeg4 = 1;
1210         s->h263_pred = 1;
1211         s->unrestricted_mv = 1;
1212         s->msmpeg4_version= 2;
1213         avctx->delay=0;
1214         s->low_delay=1;
1215         break;
1216     case CODEC_ID_MSMPEG4V3:
1217         s->out_format = FMT_H263;
1218         s->h263_msmpeg4 = 1;
1219         s->h263_pred = 1;
1220         s->unrestricted_mv = 1;
1221         s->msmpeg4_version= 3;
1222         s->flipflop_rounding=1;
1223         avctx->delay=0;
1224         s->low_delay=1;
1225         break;
1226     case CODEC_ID_WMV1:
1227         s->out_format = FMT_H263;
1228         s->h263_msmpeg4 = 1;
1229         s->h263_pred = 1;
1230         s->unrestricted_mv = 1;
1231         s->msmpeg4_version= 4;
1232         s->flipflop_rounding=1;
1233         avctx->delay=0;
1234         s->low_delay=1;
1235         break;
1236     case CODEC_ID_WMV2:
1237         s->out_format = FMT_H263;
1238         s->h263_msmpeg4 = 1;
1239         s->h263_pred = 1;
1240         s->unrestricted_mv = 1;
1241         s->msmpeg4_version= 5;
1242         s->flipflop_rounding=1;
1243         avctx->delay=0;
1244         s->low_delay=1;
1245         break;
1246     default:
1247         return -1;
1248     }
1249
1250     avctx->has_b_frames= !s->low_delay;
1251
1252     s->encoding = 1;
1253
1254     /* init */
1255     if (MPV_common_init(s) < 0)
1256         return -1;
1257
1258     if(s->modified_quant)
1259         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1260     s->progressive_frame=
1261     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1262     s->quant_precision=5;
1263
1264     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1265     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1266
1267 #ifdef CONFIG_H261_ENCODER
1268     if (s->out_format == FMT_H261)
1269         ff_h261_encode_init(s);
1270 #endif
1271     if (s->out_format == FMT_H263)
1272         h263_encode_init(s);
1273     if(s->msmpeg4_version)
1274         ff_msmpeg4_encode_init(s);
1275     if (s->out_format == FMT_MPEG1)
1276         ff_mpeg1_encode_init(s);
1277
1278     /* init q matrix */
1279     for(i=0;i<64;i++) {
1280         int j= s->dsp.idct_permutation[i];
1281         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1282             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1283             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1284         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1285             s->intra_matrix[j] =
1286             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1287         }else
1288         { /* mpeg1/2 */
1289             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1290             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1291         }
1292         if(s->avctx->intra_matrix)
1293             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1294         if(s->avctx->inter_matrix)
1295             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1296     }
1297
1298     /* precompute matrix */
1299     /* for mjpeg, we do include qscale in the matrix */
1300     if (s->out_format != FMT_MJPEG) {
1301         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1302                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1303         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1304                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1305     }
1306
1307     if(ff_rate_control_init(s) < 0)
1308         return -1;
1309
1310     return 0;
1311 }
1312
1313 int MPV_encode_end(AVCodecContext *avctx)
1314 {
1315     MpegEncContext *s = avctx->priv_data;
1316
1317 #ifdef STATS
1318     print_stats();
1319 #endif
1320
1321     ff_rate_control_uninit(s);
1322
1323     MPV_common_end(s);
1324     if (s->out_format == FMT_MJPEG)
1325         mjpeg_close(s);
1326
1327     av_freep(&avctx->extradata);
1328
1329     return 0;
1330 }
1331
1332 #endif //CONFIG_ENCODERS
1333
1334 void init_rl(RLTable *rl, int use_static)
1335 {
1336     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1337     uint8_t index_run[MAX_RUN+1];
1338     int last, run, level, start, end, i;
1339
1340     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1341     if(use_static && rl->max_level[0])
1342         return;
1343
1344     /* compute max_level[], max_run[] and index_run[] */
1345     for(last=0;last<2;last++) {
1346         if (last == 0) {
1347             start = 0;
1348             end = rl->last;
1349         } else {
1350             start = rl->last;
1351             end = rl->n;
1352         }
1353
1354         memset(max_level, 0, MAX_RUN + 1);
1355         memset(max_run, 0, MAX_LEVEL + 1);
1356         memset(index_run, rl->n, MAX_RUN + 1);
1357         for(i=start;i<end;i++) {
1358             run = rl->table_run[i];
1359             level = rl->table_level[i];
1360             if (index_run[run] == rl->n)
1361                 index_run[run] = i;
1362             if (level > max_level[run])
1363                 max_level[run] = level;
1364             if (run > max_run[level])
1365                 max_run[level] = run;
1366         }
1367         if(use_static)
1368             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1369         else
1370             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1371         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1372         if(use_static)
1373             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1374         else
1375             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1376         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1377         if(use_static)
1378             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1379         else
1380             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1381         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1382     }
1383 }
1384
1385 /* draw the edges of width 'w' of an image of size width, height */
1386 //FIXME check that this is ok for mpeg4 interlaced
1387 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1388 {
1389     uint8_t *ptr, *last_line;
1390     int i;
1391
1392     last_line = buf + (height - 1) * wrap;
1393     for(i=0;i<w;i++) {
1394         /* top and bottom */
1395         memcpy(buf - (i + 1) * wrap, buf, width);
1396         memcpy(last_line + (i + 1) * wrap, last_line, width);
1397     }
1398     /* left and right */
1399     ptr = buf;
1400     for(i=0;i<height;i++) {
1401         memset(ptr - w, ptr[0], w);
1402         memset(ptr + width, ptr[width-1], w);
1403         ptr += wrap;
1404     }
1405     /* corners */
1406     for(i=0;i<w;i++) {
1407         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1408         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1409         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1410         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1411     }
1412 }
1413
1414 int ff_find_unused_picture(MpegEncContext *s, int shared){
1415     int i;
1416
1417     if(shared){
1418         for(i=0; i<MAX_PICTURE_COUNT; i++){
1419             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1420         }
1421     }else{
1422         for(i=0; i<MAX_PICTURE_COUNT; i++){
1423             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1424         }
1425         for(i=0; i<MAX_PICTURE_COUNT; i++){
1426             if(s->picture[i].data[0]==NULL) return i;
1427         }
1428     }
1429
1430     assert(0);
1431     return -1;
1432 }
1433
1434 static void update_noise_reduction(MpegEncContext *s){
1435     int intra, i;
1436
1437     for(intra=0; intra<2; intra++){
1438         if(s->dct_count[intra] > (1<<16)){
1439             for(i=0; i<64; i++){
1440                 s->dct_error_sum[intra][i] >>=1;
1441             }
1442             s->dct_count[intra] >>= 1;
1443         }
1444
1445         for(i=0; i<64; i++){
1446             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1447         }
1448     }
1449 }
1450
1451 /**
1452  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1453  */
1454 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1455 {
1456     int i;
1457     AVFrame *pic;
1458     s->mb_skipped = 0;
1459
1460     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1461
1462     /* mark&release old frames */
1463     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1464         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1465
1466         /* release forgotten pictures */
1467         /* if(mpeg124/h263) */
1468         if(!s->encoding){
1469             for(i=0; i<MAX_PICTURE_COUNT; i++){
1470                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1471                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1472                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1473                 }
1474             }
1475         }
1476     }
1477 alloc:
1478     if(!s->encoding){
1479         /* release non reference frames */
1480         for(i=0; i<MAX_PICTURE_COUNT; i++){
1481             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1482                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1483             }
1484         }
1485
1486         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1487             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1488         else{
1489             i= ff_find_unused_picture(s, 0);
1490             pic= (AVFrame*)&s->picture[i];
1491         }
1492
1493         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1494                         && !s->dropable ? 3 : 0;
1495
1496         pic->coded_picture_number= s->coded_picture_number++;
1497
1498         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1499             return -1;
1500
1501         s->current_picture_ptr= (Picture*)pic;
1502         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1503         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1504     }
1505
1506     s->current_picture_ptr->pict_type= s->pict_type;
1507 //    if(s->flags && CODEC_FLAG_QSCALE)
1508   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1509     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1510
1511     copy_picture(&s->current_picture, s->current_picture_ptr);
1512
1513   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1514     if (s->pict_type != B_TYPE) {
1515         s->last_picture_ptr= s->next_picture_ptr;
1516         if(!s->dropable)
1517             s->next_picture_ptr= s->current_picture_ptr;
1518     }
1519 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1520         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1521         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1522         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1523         s->pict_type, s->dropable);*/
1524
1525     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1526     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1527
1528     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1529         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1530         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1531         goto alloc;
1532     }
1533
1534     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1535
1536     if(s->picture_structure!=PICT_FRAME){
1537         int i;
1538         for(i=0; i<4; i++){
1539             if(s->picture_structure == PICT_BOTTOM_FIELD){
1540                  s->current_picture.data[i] += s->current_picture.linesize[i];
1541             }
1542             s->current_picture.linesize[i] *= 2;
1543             s->last_picture.linesize[i] *=2;
1544             s->next_picture.linesize[i] *=2;
1545         }
1546     }
1547   }
1548
1549     s->hurry_up= s->avctx->hurry_up;
1550     s->error_resilience= avctx->error_resilience;
1551
1552     /* set dequantizer, we can't do it during init as it might change for mpeg4
1553        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1554     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1555         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1556         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1557     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1558         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1559         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1560     }else{
1561         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1562         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1563     }
1564
1565     if(s->dct_error_sum){
1566         assert(s->avctx->noise_reduction && s->encoding);
1567
1568         update_noise_reduction(s);
1569     }
1570
1571 #ifdef HAVE_XVMC
1572     if(s->avctx->xvmc_acceleration)
1573         return XVMC_field_start(s, avctx);
1574 #endif
1575     return 0;
1576 }
1577
1578 /* generic function for encode/decode called after a frame has been coded/decoded */
1579 void MPV_frame_end(MpegEncContext *s)
1580 {
1581     int i;
1582     /* draw edge for correct motion prediction if outside */
1583 #ifdef HAVE_XVMC
1584 //just to make sure that all data is rendered.
1585     if(s->avctx->xvmc_acceleration){
1586         XVMC_field_end(s);
1587     }else
1588 #endif
1589     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1590             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1591             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1592             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1593     }
1594     emms_c();
1595
1596     s->last_pict_type    = s->pict_type;
1597     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1598     if(s->pict_type!=B_TYPE){
1599         s->last_non_b_pict_type= s->pict_type;
1600     }
1601 #if 0
1602         /* copy back current_picture variables */
1603     for(i=0; i<MAX_PICTURE_COUNT; i++){
1604         if(s->picture[i].data[0] == s->current_picture.data[0]){
1605             s->picture[i]= s->current_picture;
1606             break;
1607         }
1608     }
1609     assert(i<MAX_PICTURE_COUNT);
1610 #endif
1611
1612     if(s->encoding){
1613         /* release non-reference frames */
1614         for(i=0; i<MAX_PICTURE_COUNT; i++){
1615             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1616                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1617             }
1618         }
1619     }
1620     // clear copies, to avoid confusion
1621 #if 0
1622     memset(&s->last_picture, 0, sizeof(Picture));
1623     memset(&s->next_picture, 0, sizeof(Picture));
1624     memset(&s->current_picture, 0, sizeof(Picture));
1625 #endif
1626     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1627 }
1628
1629 /**
1630  * draws an line from (ex, ey) -> (sx, sy).
1631  * @param w width of the image
1632  * @param h height of the image
1633  * @param stride stride/linesize of the image
1634  * @param color color of the arrow
1635  */
1636 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1637     int t, x, y, fr, f;
1638
1639     sx= clip(sx, 0, w-1);
1640     sy= clip(sy, 0, h-1);
1641     ex= clip(ex, 0, w-1);
1642     ey= clip(ey, 0, h-1);
1643
1644     buf[sy*stride + sx]+= color;
1645
1646     if(ABS(ex - sx) > ABS(ey - sy)){
1647         if(sx > ex){
1648             t=sx; sx=ex; ex=t;
1649             t=sy; sy=ey; ey=t;
1650         }
1651         buf+= sx + sy*stride;
1652         ex-= sx;
1653         f= ((ey-sy)<<16)/ex;
1654         for(x= 0; x <= ex; x++){
1655             y = (x*f)>>16;
1656             fr= (x*f)&0xFFFF;
1657             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1658             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1659         }
1660     }else{
1661         if(sy > ey){
1662             t=sx; sx=ex; ex=t;
1663             t=sy; sy=ey; ey=t;
1664         }
1665         buf+= sx + sy*stride;
1666         ey-= sy;
1667         if(ey) f= ((ex-sx)<<16)/ey;
1668         else   f= 0;
1669         for(y= 0; y <= ey; y++){
1670             x = (y*f)>>16;
1671             fr= (y*f)&0xFFFF;
1672             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1673             buf[y*stride + x+1]+= (color*         fr )>>16;;
1674         }
1675     }
1676 }
1677
1678 /**
1679  * draws an arrow from (ex, ey) -> (sx, sy).
1680  * @param w width of the image
1681  * @param h height of the image
1682  * @param stride stride/linesize of the image
1683  * @param color color of the arrow
1684  */
1685 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1686     int dx,dy;
1687
1688     sx= clip(sx, -100, w+100);
1689     sy= clip(sy, -100, h+100);
1690     ex= clip(ex, -100, w+100);
1691     ey= clip(ey, -100, h+100);
1692
1693     dx= ex - sx;
1694     dy= ey - sy;
1695
1696     if(dx*dx + dy*dy > 3*3){
1697         int rx=  dx + dy;
1698         int ry= -dx + dy;
1699         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1700
1701         //FIXME subpixel accuracy
1702         rx= ROUNDED_DIV(rx*3<<4, length);
1703         ry= ROUNDED_DIV(ry*3<<4, length);
1704
1705         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1706         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1707     }
1708     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1709 }
1710
1711 /**
1712  * prints debuging info for the given picture.
1713  */
1714 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1715
1716     if(!pict || !pict->mb_type) return;
1717
1718     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1719         int x,y;
1720
1721         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1722         switch (pict->pict_type) {
1723             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1724             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1725             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1726             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1727             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1728             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1729         }
1730         for(y=0; y<s->mb_height; y++){
1731             for(x=0; x<s->mb_width; x++){
1732                 if(s->avctx->debug&FF_DEBUG_SKIP){
1733                     int count= s->mbskip_table[x + y*s->mb_stride];
1734                     if(count>9) count=9;
1735                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1736                 }
1737                 if(s->avctx->debug&FF_DEBUG_QP){
1738                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1739                 }
1740                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1741                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1742                     //Type & MV direction
1743                     if(IS_PCM(mb_type))
1744                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1745                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1746                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1747                     else if(IS_INTRA4x4(mb_type))
1748                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1749                     else if(IS_INTRA16x16(mb_type))
1750                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1751                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1752                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1753                     else if(IS_DIRECT(mb_type))
1754                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1755                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1756                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1757                     else if(IS_GMC(mb_type))
1758                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1759                     else if(IS_SKIP(mb_type))
1760                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1761                     else if(!USES_LIST(mb_type, 1))
1762                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1763                     else if(!USES_LIST(mb_type, 0))
1764                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1765                     else{
1766                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1767                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1768                     }
1769
1770                     //segmentation
1771                     if(IS_8X8(mb_type))
1772                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1773                     else if(IS_16X8(mb_type))
1774                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1775                     else if(IS_8X16(mb_type))
1776                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1777                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1778                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1779                     else
1780                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1781
1782
1783                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1784                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1785                     else
1786                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1787                 }
1788 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1789             }
1790             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1791         }
1792     }
1793
1794     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1795         const int shift= 1 + s->quarter_sample;
1796         int mb_y;
1797         uint8_t *ptr;
1798         int i;
1799         int h_chroma_shift, v_chroma_shift;
1800         const int width = s->avctx->width;
1801         const int height= s->avctx->height;
1802         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1803         const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
1804         s->low_delay=0; //needed to see the vectors without trashing the buffers
1805
1806         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1807         for(i=0; i<3; i++){
1808             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1809             pict->data[i]= s->visualization_buffer[i];
1810         }
1811         pict->type= FF_BUFFER_TYPE_COPY;
1812         ptr= pict->data[0];
1813
1814         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1815             int mb_x;
1816             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1817                 const int mb_index= mb_x + mb_y*s->mb_stride;
1818                 if((s->avctx->debug_mv) && pict->motion_val){
1819                   int type;
1820                   for(type=0; type<3; type++){
1821                     int direction = 0;
1822                     switch (type) {
1823                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1824                                 continue;
1825                               direction = 0;
1826                               break;
1827                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1828                                 continue;
1829                               direction = 0;
1830                               break;
1831                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1832                                 continue;
1833                               direction = 1;
1834                               break;
1835                     }
1836                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1837                         continue;
1838
1839                     if(IS_8X8(pict->mb_type[mb_index])){
1840                       int i;
1841                       for(i=0; i<4; i++){
1842                         int sx= mb_x*16 + 4 + 8*(i&1);
1843                         int sy= mb_y*16 + 4 + 8*(i>>1);
1844                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1845                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1846                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1847                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1848                       }
1849                     }else if(IS_16X8(pict->mb_type[mb_index])){
1850                       int i;
1851                       for(i=0; i<2; i++){
1852                         int sx=mb_x*16 + 8;
1853                         int sy=mb_y*16 + 4 + 8*i;
1854                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1855                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1856                         int my=(pict->motion_val[direction][xy][1]>>shift);
1857
1858                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1859                             my*=2;
1860
1861                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1862                       }
1863                     }else if(IS_8X16(pict->mb_type[mb_index])){
1864                       int i;
1865                       for(i=0; i<2; i++){
1866                         int sx=mb_x*16 + 4 + 8*i;
1867                         int sy=mb_y*16 + 8;
1868                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1869                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1870                         int my=(pict->motion_val[direction][xy][1]>>shift);
1871
1872                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1873                             my*=2;
1874
1875                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1876                       }
1877                     }else{
1878                       int sx= mb_x*16 + 8;
1879                       int sy= mb_y*16 + 8;
1880                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1881                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1882                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1883                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1884                     }
1885                   }
1886                 }
1887                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1888                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1889                     int y;
1890                     for(y=0; y<8; y++){
1891                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1892                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1893                     }
1894                 }
1895                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1896                     int mb_type= pict->mb_type[mb_index];
1897                     uint64_t u,v;
1898                     int y;
1899 #define COLOR(theta, r)\
1900 u= (int)(128 + r*cos(theta*3.141592/180));\
1901 v= (int)(128 + r*sin(theta*3.141592/180));
1902
1903
1904                     u=v=128;
1905                     if(IS_PCM(mb_type)){
1906                         COLOR(120,48)
1907                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1908                         COLOR(30,48)
1909                     }else if(IS_INTRA4x4(mb_type)){
1910                         COLOR(90,48)
1911                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1912 //                        COLOR(120,48)
1913                     }else if(IS_DIRECT(mb_type)){
1914                         COLOR(150,48)
1915                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1916                         COLOR(170,48)
1917                     }else if(IS_GMC(mb_type)){
1918                         COLOR(190,48)
1919                     }else if(IS_SKIP(mb_type)){
1920 //                        COLOR(180,48)
1921                     }else if(!USES_LIST(mb_type, 1)){
1922                         COLOR(240,48)
1923                     }else if(!USES_LIST(mb_type, 0)){
1924                         COLOR(0,48)
1925                     }else{
1926                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1927                         COLOR(300,48)
1928                     }
1929
1930                     u*= 0x0101010101010101ULL;
1931                     v*= 0x0101010101010101ULL;
1932                     for(y=0; y<8; y++){
1933                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1934                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1935                     }
1936
1937                     //segmentation
1938                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1939                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1940                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1941                     }
1942                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1943                         for(y=0; y<16; y++)
1944                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1945                     }
1946                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1947                         int dm= 1 << (mv_sample_log2-2);
1948                         for(i=0; i<4; i++){
1949                             int sx= mb_x*16 + 8*(i&1);
1950                             int sy= mb_y*16 + 8*(i>>1);
1951                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1952                             //FIXME bidir
1953                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1954                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1955                                 for(y=0; y<8; y++)
1956                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1957                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1958                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1959                         }
1960                     }
1961
1962                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1963                         // hmm
1964                     }
1965                 }
1966                 s->mbskip_table[mb_index]=0;
1967             }
1968         }
1969     }
1970 }
1971
1972 #ifdef CONFIG_ENCODERS
1973
1974 static int get_sae(uint8_t *src, int ref, int stride){
1975     int x,y;
1976     int acc=0;
1977
1978     for(y=0; y<16; y++){
1979         for(x=0; x<16; x++){
1980             acc+= ABS(src[x+y*stride] - ref);
1981         }
1982     }
1983
1984     return acc;
1985 }
1986
1987 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1988     int x, y, w, h;
1989     int acc=0;
1990
1991     w= s->width &~15;
1992     h= s->height&~15;
1993
1994     for(y=0; y<h; y+=16){
1995         for(x=0; x<w; x+=16){
1996             int offset= x + y*stride;
1997             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1998             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1999             int sae = get_sae(src + offset, mean, stride);
2000
2001             acc+= sae + 500 < sad;
2002         }
2003     }
2004     return acc;
2005 }
2006
2007
2008 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2009     AVFrame *pic=NULL;
2010     int64_t pts;
2011     int i;
2012     const int encoding_delay= s->max_b_frames;
2013     int direct=1;
2014
2015     if(pic_arg){
2016         pts= pic_arg->pts;
2017         pic_arg->display_picture_number= s->input_picture_number++;
2018
2019         if(pts != AV_NOPTS_VALUE){
2020             if(s->user_specified_pts != AV_NOPTS_VALUE){
2021                 int64_t time= pts;
2022                 int64_t last= s->user_specified_pts;
2023
2024                 if(time <= last){
2025                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2026                     return -1;
2027                 }
2028             }
2029             s->user_specified_pts= pts;
2030         }else{
2031             if(s->user_specified_pts != AV_NOPTS_VALUE){
2032                 s->user_specified_pts=
2033                 pts= s->user_specified_pts + 1;
2034                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2035             }else{
2036                 pts= pic_arg->display_picture_number;
2037             }
2038         }
2039     }
2040
2041   if(pic_arg){
2042     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2043     if(pic_arg->linesize[0] != s->linesize) direct=0;
2044     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2045     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2046
2047 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2048
2049     if(direct){
2050         i= ff_find_unused_picture(s, 1);
2051
2052         pic= (AVFrame*)&s->picture[i];
2053         pic->reference= 3;
2054
2055         for(i=0; i<4; i++){
2056             pic->data[i]= pic_arg->data[i];
2057             pic->linesize[i]= pic_arg->linesize[i];
2058         }
2059         alloc_picture(s, (Picture*)pic, 1);
2060     }else{
2061         i= ff_find_unused_picture(s, 0);
2062
2063         pic= (AVFrame*)&s->picture[i];
2064         pic->reference= 3;
2065
2066         alloc_picture(s, (Picture*)pic, 0);
2067
2068         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2069            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2070            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2071        // empty
2072         }else{
2073             int h_chroma_shift, v_chroma_shift;
2074             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2075
2076             for(i=0; i<3; i++){
2077                 int src_stride= pic_arg->linesize[i];
2078                 int dst_stride= i ? s->uvlinesize : s->linesize;
2079                 int h_shift= i ? h_chroma_shift : 0;
2080                 int v_shift= i ? v_chroma_shift : 0;
2081                 int w= s->width >>h_shift;
2082                 int h= s->height>>v_shift;
2083                 uint8_t *src= pic_arg->data[i];
2084                 uint8_t *dst= pic->data[i] + INPLACE_OFFSET;
2085
2086                 if(src_stride==dst_stride)
2087                     memcpy(dst, src, src_stride*h);
2088                 else{
2089                     while(h--){
2090                         memcpy(dst, src, w);
2091                         dst += dst_stride;
2092                         src += src_stride;
2093                     }
2094                 }
2095             }
2096         }
2097     }
2098     copy_picture_attributes(s, pic, pic_arg);
2099     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2100   }
2101
2102     /* shift buffer entries */
2103     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2104         s->input_picture[i-1]= s->input_picture[i];
2105
2106     s->input_picture[encoding_delay]= (Picture*)pic;
2107
2108     return 0;
2109 }
2110
2111 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2112     int x, y, plane;
2113     int score=0;
2114     int64_t score64=0;
2115
2116     for(plane=0; plane<3; plane++){
2117         const int stride= p->linesize[plane];
2118         const int bw= plane ? 1 : 2;
2119         for(y=0; y<s->mb_height*bw; y++){
2120             for(x=0; x<s->mb_width*bw; x++){
2121                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2122                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2123
2124                 switch(s->avctx->frame_skip_exp){
2125                     case 0: score= FFMAX(score, v); break;
2126                     case 1: score+= ABS(v);break;
2127                     case 2: score+= v*v;break;
2128                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2129                     case 4: score64+= v*v*(int64_t)(v*v);break;
2130                 }
2131             }
2132         }
2133     }
2134
2135     if(score) score64= score;
2136
2137     if(score64 < s->avctx->frame_skip_threshold)
2138         return 1;
2139     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2140         return 1;
2141     return 0;
2142 }
2143
2144 static int estimate_best_b_count(MpegEncContext *s){
2145     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2146     AVCodecContext *c= avcodec_alloc_context();
2147     AVFrame input[FF_MAX_B_FRAMES+2];
2148     const int scale= s->avctx->brd_scale;
2149     int i, j, out_size, p_lambda, b_lambda, lambda2;
2150     int outbuf_size= s->width * s->height; //FIXME
2151     uint8_t *outbuf= av_malloc(outbuf_size);
2152     ImgReSampleContext *resample;
2153     int64_t best_rd= INT64_MAX;
2154     int best_b_count= -1;
2155
2156 //    emms_c();
2157     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2158     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2159     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2160     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2161
2162     c->width = s->width >> scale;
2163     c->height= s->height>> scale;
2164     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2165     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2166     c->mb_decision= s->avctx->mb_decision;
2167     c->me_cmp= s->avctx->me_cmp;
2168     c->mb_cmp= s->avctx->mb_cmp;
2169     c->me_sub_cmp= s->avctx->me_sub_cmp;
2170     c->pix_fmt = PIX_FMT_YUV420P;
2171     c->time_base= s->avctx->time_base;
2172     c->max_b_frames= s->max_b_frames;
2173
2174     if (avcodec_open(c, codec) < 0)
2175         return -1;
2176
2177     resample= img_resample_init(c->width, c->height, s->width, s->height); //FIXME use sws
2178
2179     for(i=0; i<s->max_b_frames+2; i++){
2180         int ysize= c->width*c->height;
2181         int csize= (c->width/2)*(c->height/2);
2182         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2183
2184         if(pre_input_ptr)
2185             pre_input= *pre_input_ptr;
2186
2187         if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
2188             pre_input.data[0]+=INPLACE_OFFSET;
2189             pre_input.data[1]+=INPLACE_OFFSET;
2190             pre_input.data[2]+=INPLACE_OFFSET;
2191         }
2192
2193         avcodec_get_frame_defaults(&input[i]);
2194         input[i].data[0]= av_malloc(ysize + 2*csize);
2195         input[i].data[1]= input[i].data[0] + ysize;
2196         input[i].data[2]= input[i].data[1] + csize;
2197         input[i].linesize[0]= c->width;
2198         input[i].linesize[1]=
2199         input[i].linesize[2]= c->width/2;
2200
2201         if(!i || s->input_picture[i-1])
2202             img_resample(resample, &input[i], &pre_input);
2203     }
2204
2205     for(j=0; j<s->max_b_frames+1; j++){
2206         int64_t rd=0;
2207
2208         if(!s->input_picture[j])
2209             break;
2210
2211         c->error[0]= c->error[1]= c->error[2]= 0;
2212
2213         input[0].pict_type= I_TYPE;
2214         input[0].quality= 1 * FF_QP2LAMBDA;
2215         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2216 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2217
2218         for(i=0; i<s->max_b_frames+1; i++){
2219             int is_p= i % (j+1) == j || i==s->max_b_frames;
2220
2221             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2222             input[i+1].quality= is_p ? p_lambda : b_lambda;
2223             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2224             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2225         }
2226
2227         /* get the delayed frames */
2228         while(out_size){
2229             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2230             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2231         }
2232
2233         rd += c->error[0] + c->error[1] + c->error[2];
2234
2235         if(rd < best_rd){
2236             best_rd= rd;
2237             best_b_count= j;
2238         }
2239     }
2240
2241     av_freep(&outbuf);
2242     avcodec_close(c);
2243     av_freep(&c);
2244     img_resample_close(resample);
2245
2246     for(i=0; i<s->max_b_frames+2; i++){
2247         av_freep(&input[i].data[0]);
2248     }
2249
2250     return best_b_count;
2251 }
2252
2253 static void select_input_picture(MpegEncContext *s){
2254     int i;
2255
2256     for(i=1; i<MAX_PICTURE_COUNT; i++)
2257         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2258     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2259
2260     /* set next picture type & ordering */
2261     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2262         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2263             s->reordered_input_picture[0]= s->input_picture[0];
2264             s->reordered_input_picture[0]->pict_type= I_TYPE;
2265             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2266         }else{
2267             int b_frames;
2268
2269             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2270                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2271                 //FIXME check that te gop check above is +-1 correct
2272 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2273
2274                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2275                         for(i=0; i<4; i++)
2276                             s->input_picture[0]->data[i]= NULL;
2277                         s->input_picture[0]->type= 0;
2278                     }else{
2279                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2280                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2281
2282                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2283                     }
2284
2285                     emms_c();
2286                     ff_vbv_update(s, 0);
2287
2288                     goto no_output_pic;
2289                 }
2290             }
2291
2292             if(s->flags&CODEC_FLAG_PASS2){
2293                 for(i=0; i<s->max_b_frames+1; i++){
2294                     int pict_num= s->input_picture[0]->display_picture_number + i;
2295
2296                     if(pict_num >= s->rc_context.num_entries)
2297                         break;
2298                     if(!s->input_picture[i]){
2299                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2300                         break;
2301                     }
2302
2303                     s->input_picture[i]->pict_type=
2304                         s->rc_context.entry[pict_num].new_pict_type;
2305                 }
2306             }
2307
2308             if(s->avctx->b_frame_strategy==0){
2309                 b_frames= s->max_b_frames;
2310                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2311             }else if(s->avctx->b_frame_strategy==1){
2312                 for(i=1; i<s->max_b_frames+1; i++){
2313                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2314                         s->input_picture[i]->b_frame_score=
2315                             get_intra_count(s, s->input_picture[i  ]->data[0],
2316                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2317                     }
2318                 }
2319                 for(i=0; i<s->max_b_frames+1; i++){
2320                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2321                 }
2322
2323                 b_frames= FFMAX(0, i-1);
2324
2325                 /* reset scores */
2326                 for(i=0; i<b_frames+1; i++){
2327                     s->input_picture[i]->b_frame_score=0;
2328                 }
2329             }else if(s->avctx->b_frame_strategy==2){
2330                 b_frames= estimate_best_b_count(s);
2331             }else{
2332                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2333                 b_frames=0;
2334             }
2335
2336             emms_c();
2337 //static int b_count=0;
2338 //b_count+= b_frames;
2339 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2340
2341             for(i= b_frames - 1; i>=0; i--){
2342                 int type= s->input_picture[i]->pict_type;
2343                 if(type && type != B_TYPE)
2344                     b_frames= i;
2345             }
2346             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2347                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2348             }
2349
2350             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2351               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2352                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2353               }else{
2354                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2355                     b_frames=0;
2356                 s->input_picture[b_frames]->pict_type= I_TYPE;
2357               }
2358             }
2359
2360             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2361                && b_frames
2362                && s->input_picture[b_frames]->pict_type== I_TYPE)
2363                 b_frames--;
2364
2365             s->reordered_input_picture[0]= s->input_picture[b_frames];
2366             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2367                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2368             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2369             for(i=0; i<b_frames; i++){
2370                 s->reordered_input_picture[i+1]= s->input_picture[i];
2371                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2372                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2373             }
2374         }
2375     }
2376 no_output_pic:
2377     if(s->reordered_input_picture[0]){
2378         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2379
2380         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2381
2382         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2383             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2384
2385             int i= ff_find_unused_picture(s, 0);
2386             Picture *pic= &s->picture[i];
2387
2388             /* mark us unused / free shared pic */
2389             for(i=0; i<4; i++)
2390                 s->reordered_input_picture[0]->data[i]= NULL;
2391             s->reordered_input_picture[0]->type= 0;
2392
2393             pic->reference              = s->reordered_input_picture[0]->reference;
2394
2395             alloc_picture(s, pic, 0);
2396
2397             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2398
2399             s->current_picture_ptr= pic;
2400         }else{
2401             // input is not a shared pix -> reuse buffer for current_pix
2402
2403             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2404                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2405
2406             s->current_picture_ptr= s->reordered_input_picture[0];
2407             for(i=0; i<4; i++){
2408                 s->new_picture.data[i]+= INPLACE_OFFSET;
2409             }
2410         }
2411         copy_picture(&s->current_picture, s->current_picture_ptr);
2412
2413         s->picture_number= s->new_picture.display_picture_number;
2414 //printf("dpn:%d\n", s->picture_number);
2415     }else{
2416        memset(&s->new_picture, 0, sizeof(Picture));
2417     }
2418 }
2419
2420 int MPV_encode_picture(AVCodecContext *avctx,
2421                        unsigned char *buf, int buf_size, void *data)
2422 {
2423     MpegEncContext *s = avctx->priv_data;
2424     AVFrame *pic_arg = data;
2425     int i, stuffing_count;
2426
2427     if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2428         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2429         return -1;
2430     }
2431
2432     for(i=0; i<avctx->thread_count; i++){
2433         int start_y= s->thread_context[i]->start_mb_y;
2434         int   end_y= s->thread_context[i]->  end_mb_y;
2435         int h= s->mb_height;
2436         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2437         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2438
2439         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2440     }
2441
2442     s->picture_in_gop_number++;
2443
2444     if(load_input_picture(s, pic_arg) < 0)
2445         return -1;
2446
2447     select_input_picture(s);
2448
2449     /* output? */
2450     if(s->new_picture.data[0]){
2451         s->pict_type= s->new_picture.pict_type;
2452 //emms_c();
2453 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2454         MPV_frame_start(s, avctx);
2455
2456         encode_picture(s, s->picture_number);
2457
2458         avctx->real_pict_num  = s->picture_number;
2459         avctx->header_bits = s->header_bits;
2460         avctx->mv_bits     = s->mv_bits;
2461         avctx->misc_bits   = s->misc_bits;
2462         avctx->i_tex_bits  = s->i_tex_bits;
2463         avctx->p_tex_bits  = s->p_tex_bits;
2464         avctx->i_count     = s->i_count;
2465         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2466         avctx->skip_count  = s->skip_count;
2467
2468         MPV_frame_end(s);
2469
2470         if (s->out_format == FMT_MJPEG)
2471             mjpeg_picture_trailer(s);
2472
2473         if(s->flags&CODEC_FLAG_PASS1)
2474             ff_write_pass1_stats(s);
2475
2476         for(i=0; i<4; i++){
2477             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2478             avctx->error[i] += s->current_picture_ptr->error[i];
2479         }
2480
2481         if(s->flags&CODEC_FLAG_PASS1)
2482             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2483         flush_put_bits(&s->pb);
2484         s->frame_bits  = put_bits_count(&s->pb);
2485
2486         stuffing_count= ff_vbv_update(s, s->frame_bits);
2487         if(stuffing_count){
2488             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2489                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2490                 return -1;
2491             }
2492
2493             switch(s->codec_id){
2494             case CODEC_ID_MPEG1VIDEO:
2495             case CODEC_ID_MPEG2VIDEO:
2496                 while(stuffing_count--){
2497                     put_bits(&s->pb, 8, 0);
2498                 }
2499             break;
2500             case CODEC_ID_MPEG4:
2501                 put_bits(&s->pb, 16, 0);
2502                 put_bits(&s->pb, 16, 0x1C3);
2503                 stuffing_count -= 4;
2504                 while(stuffing_count--){
2505                     put_bits(&s->pb, 8, 0xFF);
2506                 }
2507             break;
2508             default:
2509                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2510             }
2511             flush_put_bits(&s->pb);
2512             s->frame_bits  = put_bits_count(&s->pb);
2513         }
2514
2515         /* update mpeg1/2 vbv_delay for CBR */
2516         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2517            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2518             int vbv_delay;
2519
2520             assert(s->repeat_first_field==0);
2521
2522             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2523             assert(vbv_delay < 0xFFFF);
2524
2525             s->vbv_delay_ptr[0] &= 0xF8;
2526             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2527             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2528             s->vbv_delay_ptr[2] &= 0x07;
2529             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2530         }
2531         s->total_bits += s->frame_bits;
2532         avctx->frame_bits  = s->frame_bits;
2533     }else{
2534         assert((pbBufPtr(&s->pb) == s->pb.buf));
2535         s->frame_bits=0;
2536     }
2537     assert((s->frame_bits&7)==0);
2538
2539     return s->frame_bits/8;
2540 }
2541
2542 #endif //CONFIG_ENCODERS
2543
2544 static inline void gmc1_motion(MpegEncContext *s,
2545                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2546                                uint8_t **ref_picture)
2547 {
2548     uint8_t *ptr;
2549     int offset, src_x, src_y, linesize, uvlinesize;
2550     int motion_x, motion_y;
2551     int emu=0;
2552
2553     motion_x= s->sprite_offset[0][0];
2554     motion_y= s->sprite_offset[0][1];
2555     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2556     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2557     motion_x<<=(3-s->sprite_warping_accuracy);
2558     motion_y<<=(3-s->sprite_warping_accuracy);
2559     src_x = clip(src_x, -16, s->width);
2560     if (src_x == s->width)
2561         motion_x =0;
2562     src_y = clip(src_y, -16, s->height);
2563     if (src_y == s->height)
2564         motion_y =0;
2565
2566     linesize = s->linesize;
2567     uvlinesize = s->uvlinesize;
2568
2569     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2570
2571     if(s->flags&CODEC_FLAG_EMU_EDGE){
2572         if(   (unsigned)src_x >= s->h_edge_pos - 17
2573            || (unsigned)src_y >= s->v_edge_pos - 17){
2574             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2575             ptr= s->edge_emu_buffer;
2576         }
2577     }
2578
2579     if((motion_x|motion_y)&7){
2580         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2581         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2582     }else{
2583         int dxy;
2584
2585         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2586         if (s->no_rounding){
2587             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2588         }else{
2589             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2590         }
2591     }
2592
2593     if(s->flags&CODEC_FLAG_GRAY) return;
2594
2595     motion_x= s->sprite_offset[1][0];
2596     motion_y= s->sprite_offset[1][1];
2597     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2598     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2599     motion_x<<=(3-s->sprite_warping_accuracy);
2600     motion_y<<=(3-s->sprite_warping_accuracy);
2601     src_x = clip(src_x, -8, s->width>>1);
2602     if (src_x == s->width>>1)
2603         motion_x =0;
2604     src_y = clip(src_y, -8, s->height>>1);
2605     if (src_y == s->height>>1)
2606         motion_y =0;
2607
2608     offset = (src_y * uvlinesize) + src_x;
2609     ptr = ref_picture[1] + offset;
2610     if(s->flags&CODEC_FLAG_EMU_EDGE){
2611         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2612            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2613             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2614             ptr= s->edge_emu_buffer;
2615             emu=1;
2616         }
2617     }
2618     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2619
2620     ptr = ref_picture[2] + offset;
2621     if(emu){
2622         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2623         ptr= s->edge_emu_buffer;
2624     }
2625     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2626
2627     return;
2628 }
2629
2630 static inline void gmc_motion(MpegEncContext *s,
2631                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2632                                uint8_t **ref_picture)
2633 {
2634     uint8_t *ptr;
2635     int linesize, uvlinesize;
2636     const int a= s->sprite_warping_accuracy;
2637     int ox, oy;
2638
2639     linesize = s->linesize;
2640     uvlinesize = s->uvlinesize;
2641
2642     ptr = ref_picture[0];
2643
2644     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2645     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2646
2647     s->dsp.gmc(dest_y, ptr, linesize, 16,
2648            ox,
2649            oy,
2650            s->sprite_delta[0][0], s->sprite_delta[0][1],
2651            s->sprite_delta[1][0], s->sprite_delta[1][1],
2652            a+1, (1<<(2*a+1)) - s->no_rounding,
2653            s->h_edge_pos, s->v_edge_pos);
2654     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2655            ox + s->sprite_delta[0][0]*8,
2656            oy + s->sprite_delta[1][0]*8,
2657            s->sprite_delta[0][0], s->sprite_delta[0][1],
2658            s->sprite_delta[1][0], s->sprite_delta[1][1],
2659            a+1, (1<<(2*a+1)) - s->no_rounding,
2660            s->h_edge_pos, s->v_edge_pos);
2661
2662     if(s->flags&CODEC_FLAG_GRAY) return;
2663
2664     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2665     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2666
2667     ptr = ref_picture[1];
2668     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2669            ox,
2670            oy,
2671            s->sprite_delta[0][0], s->sprite_delta[0][1],
2672            s->sprite_delta[1][0], s->sprite_delta[1][1],
2673            a+1, (1<<(2*a+1)) - s->no_rounding,
2674            s->h_edge_pos>>1, s->v_edge_pos>>1);
2675
2676     ptr = ref_picture[2];
2677     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2678            ox,
2679            oy,
2680            s->sprite_delta[0][0], s->sprite_delta[0][1],
2681            s->sprite_delta[1][0], s->sprite_delta[1][1],
2682            a+1, (1<<(2*a+1)) - s->no_rounding,
2683            s->h_edge_pos>>1, s->v_edge_pos>>1);
2684 }
2685
2686 /**
2687  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2688  * @param buf destination buffer
2689  * @param src source buffer
2690  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2691  * @param block_w width of block
2692  * @param block_h height of block
2693  * @param src_x x coordinate of the top left sample of the block in the source buffer
2694  * @param src_y y coordinate of the top left sample of the block in the source buffer
2695  * @param w width of the source buffer
2696  * @param h height of the source buffer
2697  */
2698 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2699                                     int src_x, int src_y, int w, int h){
2700     int x, y;
2701     int start_y, start_x, end_y, end_x;
2702
2703     if(src_y>= h){
2704         src+= (h-1-src_y)*linesize;
2705         src_y=h-1;
2706     }else if(src_y<=-block_h){
2707         src+= (1-block_h-src_y)*linesize;
2708         src_y=1-block_h;
2709     }
2710     if(src_x>= w){
2711         src+= (w-1-src_x);
2712         src_x=w-1;
2713     }else if(src_x<=-block_w){
2714         src+= (1-block_w-src_x);
2715         src_x=1-block_w;
2716     }
2717
2718     start_y= FFMAX(0, -src_y);
2719     start_x= FFMAX(0, -src_x);
2720     end_y= FFMIN(block_h, h-src_y);
2721     end_x= FFMIN(block_w, w-src_x);
2722
2723     // copy existing part
2724     for(y=start_y; y<end_y; y++){
2725         for(x=start_x; x<end_x; x++){
2726             buf[x + y*linesize]= src[x + y*linesize];
2727         }
2728     }
2729
2730     //top
2731     for(y=0; y<start_y; y++){
2732         for(x=start_x; x<end_x; x++){
2733             buf[x + y*linesize]= buf[x + start_y*linesize];
2734         }
2735     }
2736
2737     //bottom
2738     for(y=end_y; y<block_h; y++){
2739         for(x=start_x; x<end_x; x++){
2740             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2741         }
2742     }
2743
2744     for(y=0; y<block_h; y++){
2745        //left
2746         for(x=0; x<start_x; x++){
2747             buf[x + y*linesize]= buf[start_x + y*linesize];
2748         }
2749
2750        //right
2751         for(x=end_x; x<block_w; x++){
2752             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2753         }
2754     }
2755 }
2756
2757 static inline int hpel_motion(MpegEncContext *s,
2758                                   uint8_t *dest, uint8_t *src,
2759                                   int field_based, int field_select,
2760                                   int src_x, int src_y,
2761                                   int width, int height, int stride,
2762                                   int h_edge_pos, int v_edge_pos,
2763                                   int w, int h, op_pixels_func *pix_op,
2764                                   int motion_x, int motion_y)
2765 {
2766     int dxy;
2767     int emu=0;
2768
2769     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2770     src_x += motion_x >> 1;
2771     src_y += motion_y >> 1;
2772
2773     /* WARNING: do no forget half pels */
2774     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2775     if (src_x == width)
2776         dxy &= ~1;
2777     src_y = clip(src_y, -16, height);
2778     if (src_y == height)
2779         dxy &= ~2;
2780     src += src_y * stride + src_x;
2781
2782     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2783         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2784            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2785             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2786                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2787             src= s->edge_emu_buffer;
2788             emu=1;
2789         }
2790     }
2791     if(field_select)
2792         src += s->linesize;
2793     pix_op[dxy](dest, src, stride, h);
2794     return emu;
2795 }
2796
2797 static inline int hpel_motion_lowres(MpegEncContext *s,
2798                                   uint8_t *dest, uint8_t *src,
2799                                   int field_based, int field_select,
2800                                   int src_x, int src_y,
2801                                   int width, int height, int stride,
2802                                   int h_edge_pos, int v_edge_pos,
2803                                   int w, int h, h264_chroma_mc_func *pix_op,
2804                                   int motion_x, int motion_y)
2805 {
2806     const int lowres= s->avctx->lowres;
2807     const int s_mask= (2<<lowres)-1;
2808     int emu=0;
2809     int sx, sy;
2810
2811     if(s->quarter_sample){
2812         motion_x/=2;
2813         motion_y/=2;
2814     }
2815
2816     sx= motion_x & s_mask;
2817     sy= motion_y & s_mask;
2818     src_x += motion_x >> (lowres+1);
2819     src_y += motion_y >> (lowres+1);
2820
2821     src += src_y * stride + src_x;
2822
2823     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2824        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2825         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2826                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2827         src= s->edge_emu_buffer;
2828         emu=1;
2829     }
2830
2831     sx <<= 2 - lowres;
2832     sy <<= 2 - lowres;
2833     if(field_select)
2834         src += s->linesize;
2835     pix_op[lowres](dest, src, stride, h, sx, sy);
2836     return emu;
2837 }
2838
2839 /* apply one mpeg motion vector to the three components */
2840 static always_inline void mpeg_motion(MpegEncContext *s,
2841                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2842                                int field_based, int bottom_field, int field_select,
2843                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2844                                int motion_x, int motion_y, int h)
2845 {
2846     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2847     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2848
2849 #if 0
2850 if(s->quarter_sample)
2851 {
2852     motion_x>>=1;
2853     motion_y>>=1;
2854 }
2855 #endif
2856
2857     v_edge_pos = s->v_edge_pos >> field_based;
2858     linesize   = s->current_picture.linesize[0] << field_based;
2859     uvlinesize = s->current_picture.linesize[1] << field_based;
2860
2861     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2862     src_x = s->mb_x* 16               + (motion_x >> 1);
2863     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2864
2865     if (s->out_format == FMT_H263) {
2866         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2867             mx = (motion_x>>1)|(motion_x&1);
2868             my = motion_y >>1;
2869             uvdxy = ((my & 1) << 1) | (mx & 1);
2870             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2871             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2872         }else{
2873             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2874             uvsrc_x = src_x>>1;
2875             uvsrc_y = src_y>>1;
2876         }
2877     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2878         mx = motion_x / 4;
2879         my = motion_y / 4;
2880         uvdxy = 0;
2881         uvsrc_x = s->mb_x*8 + mx;
2882         uvsrc_y = s->mb_y*8 + my;
2883     } else {
2884         if(s->chroma_y_shift){
2885             mx = motion_x / 2;
2886             my = motion_y / 2;
2887             uvdxy = ((my & 1) << 1) | (mx & 1);
2888             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2889             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2890         } else {
2891             if(s->chroma_x_shift){
2892             //Chroma422
2893                 mx = motion_x / 2;
2894                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2895                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2896                 uvsrc_y = src_y;
2897             } else {
2898             //Chroma444
2899                 uvdxy = dxy;
2900                 uvsrc_x = src_x;
2901                 uvsrc_y = src_y;
2902             }
2903         }
2904     }
2905
2906     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2907     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2908     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2909
2910     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2911        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2912             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2913                s->codec_id == CODEC_ID_MPEG1VIDEO){
2914                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2915                 return ;
2916             }
2917             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2918                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2919             ptr_y = s->edge_emu_buffer;
2920             if(!(s->flags&CODEC_FLAG_GRAY)){
2921                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2922                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2923                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2924                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2925                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2926                 ptr_cb= uvbuf;
2927                 ptr_cr= uvbuf+16;
2928             }
2929     }
2930
2931     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2932         dest_y += s->linesize;
2933         dest_cb+= s->uvlinesize;
2934         dest_cr+= s->uvlinesize;
2935     }
2936
2937     if(field_select){
2938         ptr_y += s->linesize;
2939         ptr_cb+= s->uvlinesize;
2940         ptr_cr+= s->uvlinesize;
2941     }
2942
2943     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2944
2945     if(!(s->flags&CODEC_FLAG_GRAY)){
2946         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2947         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2948     }
2949 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
2950     if(s->out_format == FMT_H261){
2951         ff_h261_loop_filter(s);
2952     }
2953 #endif
2954 }
2955
2956 /* apply one mpeg motion vector to the three components */
2957 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2958                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2959                                int field_based, int bottom_field, int field_select,
2960                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2961                                int motion_x, int motion_y, int h)
2962 {
2963     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2964     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2965     const int lowres= s->avctx->lowres;
2966     const int block_s= 8>>lowres;
2967     const int s_mask= (2<<lowres)-1;
2968     const int h_edge_pos = s->h_edge_pos >> lowres;
2969     const int v_edge_pos = s->v_edge_pos >> lowres;
2970     linesize   = s->current_picture.linesize[0] << field_based;
2971     uvlinesize = s->current_picture.linesize[1] << field_based;
2972
2973     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
2974         motion_x/=2;
2975         motion_y/=2;
2976     }
2977
2978     if(field_based){
2979         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
2980     }
2981
2982     sx= motion_x & s_mask;
2983     sy= motion_y & s_mask;
2984     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
2985     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
2986
2987     if (s->out_format == FMT_H263) {
2988         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
2989         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
2990         uvsrc_x = src_x>>1;
2991         uvsrc_y = src_y>>1;
2992     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2993         mx = motion_x / 4;
2994         my = motion_y / 4;
2995         uvsx = (2*mx) & s_mask;
2996         uvsy = (2*my) & s_mask;
2997         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
2998         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
2999     } else {
3000         mx = motion_x / 2;
3001         my = motion_y / 2;
3002         uvsx = mx & s_mask;
3003         uvsy = my & s_mask;
3004         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3005         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3006     }
3007
3008     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3009     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3010     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3011
3012     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3013        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3014             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3015                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3016             ptr_y = s->edge_emu_buffer;
3017             if(!(s->flags&CODEC_FLAG_GRAY)){
3018                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3019                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3020                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3021                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3022                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3023                 ptr_cb= uvbuf;
3024                 ptr_cr= uvbuf+16;
3025             }
3026     }
3027
3028     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3029         dest_y += s->linesize;
3030         dest_cb+= s->uvlinesize;
3031         dest_cr+= s->uvlinesize;
3032     }
3033
3034     if(field_select){
3035         ptr_y += s->linesize;
3036         ptr_cb+= s->uvlinesize;
3037         ptr_cr+= s->uvlinesize;
3038     }
3039
3040     sx <<= 2 - lowres;
3041     sy <<= 2 - lowres;
3042     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3043
3044     if(!(s->flags&CODEC_FLAG_GRAY)){
3045         uvsx <<= 2 - lowres;
3046         uvsy <<= 2 - lowres;
3047         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3048         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3049     }
3050     //FIXME h261 lowres loop filter
3051 }
3052
3053 //FIXME move to dsputil, avg variant, 16x16 version
3054 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3055     int x;
3056     uint8_t * const top   = src[1];
3057     uint8_t * const left  = src[2];
3058     uint8_t * const mid   = src[0];
3059     uint8_t * const right = src[3];
3060     uint8_t * const bottom= src[4];
3061 #define OBMC_FILTER(x, t, l, m, r, b)\
3062     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3063 #define OBMC_FILTER4(x, t, l, m, r, b)\
3064     OBMC_FILTER(x         , t, l, m, r, b);\
3065     OBMC_FILTER(x+1       , t, l, m, r, b);\
3066     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3067     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3068
3069     x=0;
3070     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3071     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3072     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3073     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3074     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3075     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3076     x+= stride;
3077     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3078     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3079     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3080     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3081     x+= stride;
3082     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3083     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3084     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3085     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3086     x+= 2*stride;
3087     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3088     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3089     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3090     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3091     x+= 2*stride;
3092     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3093     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3094     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3095     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3096     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3097     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3098     x+= stride;
3099     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3100     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3101     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3102     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3103 }
3104
3105 /* obmc for 1 8x8 luma block */
3106 static inline void obmc_motion(MpegEncContext *s,
3107                                uint8_t *dest, uint8_t *src,
3108                                int src_x, int src_y,
3109                                op_pixels_func *pix_op,
3110                                int16_t mv[5][2]/* mid top left right bottom*/)
3111 #define MID    0
3112 {
3113     int i;
3114     uint8_t *ptr[5];
3115
3116     assert(s->quarter_sample==0);
3117
3118     for(i=0; i<5; i++){
3119         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3120             ptr[i]= ptr[MID];
3121         }else{
3122             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3123             hpel_motion(s, ptr[i], src, 0, 0,
3124                         src_x, src_y,
3125                         s->width, s->height, s->linesize,
3126                         s->h_edge_pos, s->v_edge_pos,
3127                         8, 8, pix_op,
3128                         mv[i][0], mv[i][1]);
3129         }
3130     }
3131
3132     put_obmc(dest, ptr, s->linesize);
3133 }
3134
3135 static inline void qpel_motion(MpegEncContext *s,
3136                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3137                                int field_based, int bottom_field, int field_select,
3138                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3139                                qpel_mc_func (*qpix_op)[16],
3140                                int motion_x, int motion_y, int h)
3141 {
3142     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3143     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3144
3145     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3146     src_x = s->mb_x *  16                 + (motion_x >> 2);
3147     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3148
3149     v_edge_pos = s->v_edge_pos >> field_based;
3150     linesize = s->linesize << field_based;
3151     uvlinesize = s->uvlinesize << field_based;
3152
3153     if(field_based){
3154         mx= motion_x/2;
3155         my= motion_y>>1;
3156     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3157         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3158         mx= (motion_x>>1) + rtab[motion_x&7];
3159         my= (motion_y>>1) + rtab[motion_y&7];
3160     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3161         mx= (motion_x>>1)|(motion_x&1);
3162         my= (motion_y>>1)|(motion_y&1);
3163     }else{
3164         mx= motion_x/2;
3165         my= motion_y/2;
3166     }
3167     mx= (mx>>1)|(mx&1);
3168     my= (my>>1)|(my&1);
3169
3170     uvdxy= (mx&1) | ((my&1)<<1);
3171     mx>>=1;
3172     my>>=1;
3173
3174     uvsrc_x = s->mb_x *  8                 + mx;
3175     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3176
3177     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3178     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3179     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3180
3181     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3182        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3183         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3184                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3185         ptr_y= s->edge_emu_buffer;
3186         if(!(s->flags&CODEC_FLAG_GRAY)){
3187             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3188             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3189                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3190             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3191                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3192             ptr_cb= uvbuf;
3193             ptr_cr= uvbuf + 16;
3194         }
3195     }
3196
3197     if(!field_based)
3198         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3199     else{
3200         if(bottom_field){
3201             dest_y += s->linesize;
3202             dest_cb+= s->uvlinesize;
3203             dest_cr+= s->uvlinesize;
3204         }
3205
3206         if(field_select){
3207             ptr_y  += s->linesize;
3208             ptr_cb += s->uvlinesize;
3209             ptr_cr += s->uvlinesize;
3210         }
3211         //damn interlaced mode
3212         //FIXME boundary mirroring is not exactly correct here
3213         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3214         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3215     }
3216     if(!(s->flags&CODEC_FLAG_GRAY)){
3217         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3218         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3219     }
3220 }
3221
3222 inline int ff_h263_round_chroma(int x){
3223     if (x >= 0)
3224         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3225     else {
3226         x = -x;
3227         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3228     }
3229 }
3230
3231 /**
3232  * h263 chorma 4mv motion compensation.
3233  */
3234 static inline void chroma_4mv_motion(MpegEncContext *s,
3235                                      uint8_t *dest_cb, uint8_t *dest_cr,
3236                                      uint8_t **ref_picture,
3237                                      op_pixels_func *pix_op,
3238                                      int mx, int my){
3239     int dxy, emu=0, src_x, src_y, offset;
3240     uint8_t *ptr;
3241
3242     /* In case of 8X8, we construct a single chroma motion vector
3243        with a special rounding */
3244     mx= ff_h263_round_chroma(mx);
3245     my= ff_h263_round_chroma(my);
3246
3247     dxy = ((my & 1) << 1) | (mx & 1);
3248     mx >>= 1;
3249     my >>= 1;
3250
3251     src_x = s->mb_x * 8 + mx;
3252     src_y = s->mb_y * 8 + my;
3253     src_x = clip(src_x, -8, s->width/2);
3254     if (src_x == s->width/2)
3255         dxy &= ~1;
3256     src_y = clip(src_y, -8, s->height/2);
3257     if (src_y == s->height/2)
3258         dxy &= ~2;
3259
3260     offset = (src_y * (s->uvlinesize)) + src_x;
3261     ptr = ref_picture[1] + offset;
3262     if(s->flags&CODEC_FLAG_EMU_EDGE){
3263         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3264            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3265             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3266             ptr= s->edge_emu_buffer;
3267             emu=1;
3268         }
3269     }
3270     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3271
3272     ptr = ref_picture[2] + offset;
3273     if(emu){
3274         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3275         ptr= s->edge_emu_buffer;
3276     }
3277     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3278 }
3279
3280 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3281                                      uint8_t *dest_cb, uint8_t *dest_cr,
3282                                      uint8_t **ref_picture,
3283                                      h264_chroma_mc_func *pix_op,
3284                                      int mx, int my){
3285     const int lowres= s->avctx->lowres;
3286     const int block_s= 8>>lowres;
3287     const int s_mask= (2<<lowres)-1;
3288     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3289     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3290     int emu=0, src_x, src_y, offset, sx, sy;
3291     uint8_t *ptr;
3292
3293     if(s->quarter_sample){
3294         mx/=2;
3295         my/=2;
3296     }
3297
3298     /* In case of 8X8, we construct a single chroma motion vector
3299        with a special rounding */
3300     mx= ff_h263_round_chroma(mx);
3301     my= ff_h263_round_chroma(my);
3302
3303     sx= mx & s_mask;
3304     sy= my & s_mask;
3305     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3306     src_y = s->mb_y*block_s + (my >> (lowres+1));
3307
3308     offset = src_y * s->uvlinesize + src_x;
3309     ptr = ref_picture[1] + offset;
3310     if(s->flags&CODEC_FLAG_EMU_EDGE){
3311         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3312            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3313             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3314             ptr= s->edge_emu_buffer;
3315             emu=1;
3316         }
3317     }
3318     sx <<= 2 - lowres;
3319     sy <<= 2 - lowres;
3320     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3321
3322     ptr = ref_picture[2] + offset;
3323     if(emu){
3324         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3325         ptr= s->edge_emu_buffer;
3326     }
3327     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3328 }
3329
3330 /**
3331  * motion compensation of a single macroblock
3332  * @param s context
3333  * @param dest_y luma destination pointer
3334  * @param dest_cb chroma cb/u destination pointer
3335  * @param dest_cr chroma cr/v destination pointer
3336  * @param dir direction (0->forward, 1->backward)
3337  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3338  * @param pic_op halfpel motion compensation function (average or put normally)
3339  * @param pic_op qpel motion compensation function (average or put normally)
3340  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3341  */
3342 static inline void MPV_motion(MpegEncContext *s,
3343                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3344                               int dir, uint8_t **ref_picture,
3345                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3346 {
3347     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3348     int mb_x, mb_y, i;
3349     uint8_t *ptr, *dest;
3350
3351     mb_x = s->mb_x;
3352     mb_y = s->mb_y;
3353
3354     if(s->obmc && s->pict_type != B_TYPE){
3355         int16_t mv_cache[4][4][2];
3356         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3357         const int mot_stride= s->b8_stride;
3358         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3359
3360         assert(!s->mb_skipped);
3361
3362         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3363         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3364         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3365
3366         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3367             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3368         }else{
3369             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3370         }
3371
3372         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3373             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3374             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3375         }else{
3376             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3377             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3378         }
3379
3380         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3381             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3382             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3383         }else{
3384             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3385             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3386         }
3387
3388         mx = 0;
3389         my = 0;
3390         for(i=0;i<4;i++) {
3391             const int x= (i&1)+1;
3392             const int y= (i>>1)+1;
3393             int16_t mv[5][2]= {
3394                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3395                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3396                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3397                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3398                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3399             //FIXME cleanup
3400             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3401                         ref_picture[0],
3402                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3403                         pix_op[1],
3404                         mv);
3405
3406             mx += mv[0][0];
3407             my += mv[0][1];
3408         }
3409         if(!(s->flags&CODEC_FLAG_GRAY))
3410             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3411
3412         return;
3413     }
3414
3415     switch(s->mv_type) {
3416     case MV_TYPE_16X16:
3417         if(s->mcsel){
3418             if(s->real_sprite_warping_points==1){
3419                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3420                             ref_picture);
3421             }else{
3422                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3423                             ref_picture);
3424             }
3425         }else if(s->quarter_sample){
3426             qpel_motion(s, dest_y, dest_cb, dest_cr,
3427                         0, 0, 0,
3428                         ref_picture, pix_op, qpix_op,
3429                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3430         }else if(s->mspel){
3431             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3432                         ref_picture, pix_op,
3433                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3434         }else
3435         {
3436             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3437                         0, 0, 0,
3438                         ref_picture, pix_op,
3439                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3440         }
3441         break;
3442     case MV_TYPE_8X8:
3443         mx = 0;
3444         my = 0;
3445         if(s->quarter_sample){
3446             for(i=0;i<4;i++) {
3447                 motion_x = s->mv[dir][i][0];
3448                 motion_y = s->mv[dir][i][1];
3449
3450                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3451                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3452                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3453
3454                 /* WARNING: do no forget half pels */
3455                 src_x = clip(src_x, -16, s->width);
3456                 if (src_x == s->width)
3457                     dxy &= ~3;
3458                 src_y = clip(src_y, -16, s->height);
3459                 if (src_y == s->height)
3460                     dxy &= ~12;
3461
3462                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3463                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3464                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3465                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3466                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3467                         ptr= s->edge_emu_buffer;
3468                     }
3469                 }
3470                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3471                 qpix_op[1][dxy](dest, ptr, s->linesize);
3472
3473                 mx += s->mv[dir][i][0]/2;
3474                 my += s->mv[dir][i][1]/2;
3475             }
3476         }else{
3477             for(i=0;i<4;i++) {
3478                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3479                             ref_picture[0], 0, 0,
3480                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3481                             s->width, s->height, s->linesize,
3482                             s->h_edge_pos, s->v_edge_pos,
3483                             8, 8, pix_op[1],
3484                             s->mv[dir][i][0], s->mv[dir][i][1]);
3485
3486                 mx += s->mv[dir][i][0];
3487                 my += s->mv[dir][i][1];
3488             }
3489         }
3490
3491         if(!(s->flags&CODEC_FLAG_GRAY))
3492             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3493         break;
3494     case MV_TYPE_FIELD:
3495         if (s->picture_structure == PICT_FRAME) {
3496             if(s->quarter_sample){
3497                 for(i=0; i<2; i++){
3498                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3499                                 1, i, s->field_select[dir][i],
3500                                 ref_picture, pix_op, qpix_op,
3501                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3502                 }
3503             }else{
3504                 /* top field */
3505                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3506                             1, 0, s->field_select[dir][0],
3507                             ref_picture, pix_op,
3508                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3509                 /* bottom field */
3510                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3511                             1, 1, s->field_select[dir][1],
3512                             ref_picture, pix_op,
3513                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3514             }
3515         } else {
3516             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3517                 ref_picture= s->current_picture_ptr->data;
3518             }
3519
3520             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3521                         0, 0, s->field_select[dir][0],
3522                         ref_picture, pix_op,
3523                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3524         }
3525         break;
3526     case MV_TYPE_16X8:
3527         for(i=0; i<2; i++){
3528             uint8_t ** ref2picture;
3529
3530             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3531                 ref2picture= ref_picture;
3532             }else{
3533                 ref2picture= s->current_picture_ptr->data;
3534             }
3535
3536             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3537                         0, 0, s->field_select[dir][i],
3538                         ref2picture, pix_op,
3539                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3540
3541             dest_y += 16*s->linesize;
3542             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3543             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3544         }
3545         break;
3546     case MV_TYPE_DMV:
3547         if(s->picture_structure == PICT_FRAME){
3548             for(i=0; i<2; i++){
3549                 int j;
3550                 for(j=0; j<2; j++){
3551                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3552                                 1, j, j^i,
3553                                 ref_picture, pix_op,
3554                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3555                 }
3556                 pix_op = s->dsp.avg_pixels_tab;
3557             }
3558         }else{
3559             for(i=0; i<2; i++){
3560                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3561                             0, 0, s->picture_structure != i+1,
3562                             ref_picture, pix_op,
3563                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3564
3565                 // after put we make avg of the same block
3566                 pix_op=s->dsp.avg_pixels_tab;
3567
3568                 //opposite parity is always in the same frame if this is second field
3569                 if(!s->first_field){
3570                     ref_picture = s->current_picture_ptr->data;
3571                 }
3572             }
3573         }
3574     break;
3575     default: assert(0);
3576     }
3577 }
3578
3579 /**
3580  * motion compensation of a single macroblock
3581  * @param s context
3582  * @param dest_y luma destination pointer
3583  * @param dest_cb chroma cb/u destination pointer
3584  * @param dest_cr chroma cr/v destination pointer
3585  * @param dir direction (0->forward, 1->backward)
3586  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3587  * @param pic_op halfpel motion compensation function (average or put normally)
3588  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3589  */
3590 static inline void MPV_motion_lowres(MpegEncContext *s,
3591                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3592                               int dir, uint8_t **ref_picture,
3593                               h264_chroma_mc_func *pix_op)
3594 {
3595     int mx, my;
3596     int mb_x, mb_y, i;
3597     const int lowres= s->avctx->lowres;
3598     const int block_s= 8>>lowres;
3599
3600     mb_x = s->mb_x;
3601     mb_y = s->mb_y;
3602
3603     switch(s->mv_type) {
3604     case MV_TYPE_16X16:
3605         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3606                     0, 0, 0,
3607                     ref_picture, pix_op,
3608                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3609         break;
3610     case MV_TYPE_8X8:
3611         mx = 0;
3612         my = 0;
3613             for(i=0;i<4;i++) {
3614                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3615                             ref_picture[0], 0, 0,
3616                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3617                             s->width, s->height, s->linesize,
3618                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3619                             block_s, block_s, pix_op,
3620                             s->mv[dir][i][0], s->mv[dir][i][1]);
3621
3622                 mx += s->mv[dir][i][0];
3623                 my += s->mv[dir][i][1];
3624             }
3625
3626         if(!(s->flags&CODEC_FLAG_GRAY))
3627             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3628         break;
3629     case MV_TYPE_FIELD:
3630         if (s->picture_structure == PICT_FRAME) {
3631             /* top field */
3632             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3633                         1, 0, s->field_select[dir][0],
3634                         ref_picture, pix_op,
3635                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3636             /* bottom field */
3637             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3638                         1, 1, s->field_select[dir][1],
3639                         ref_picture, pix_op,
3640                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3641         } else {
3642             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3643                 ref_picture= s->current_picture_ptr->data;
3644             }
3645
3646             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3647                         0, 0, s->field_select[dir][0],
3648                         ref_picture, pix_op,
3649                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3650         }
3651         break;
3652     case MV_TYPE_16X8:
3653         for(i=0; i<2; i++){
3654             uint8_t ** ref2picture;
3655
3656             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3657                 ref2picture= ref_picture;
3658             }else{
3659                 ref2picture= s->current_picture_ptr->data;
3660             }
3661
3662             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3663                         0, 0, s->field_select[dir][i],
3664                         ref2picture, pix_op,
3665                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3666
3667             dest_y += 2*block_s*s->linesize;
3668             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3669             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3670         }
3671         break;
3672     case MV_TYPE_DMV:
3673         if(s->picture_structure == PICT_FRAME){
3674             for(i=0; i<2; i++){
3675                 int j;
3676                 for(j=0; j<2; j++){
3677                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3678                                 1, j, j^i,
3679                                 ref_picture, pix_op,
3680                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3681                 }
3682                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3683             }
3684         }else{
3685             for(i=0; i<2; i++){
3686                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3687                             0, 0, s->picture_structure != i+1,
3688                             ref_picture, pix_op,
3689                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3690
3691                 // after put we make avg of the same block
3692                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3693
3694                 //opposite parity is always in the same frame if this is second field
3695                 if(!s->first_field){
3696                     ref_picture = s->current_picture_ptr->data;
3697                 }
3698             }
3699         }
3700     break;
3701     default: assert(0);
3702     }
3703 }
3704
3705 /* put block[] to dest[] */
3706 static inline void put_dct(MpegEncContext *s,
3707                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3708 {
3709     s->dct_unquantize_intra(s, block, i, qscale);
3710     s->dsp.idct_put (dest, line_size, block);
3711 }
3712
3713 /* add block[] to dest[] */
3714 static inline void add_dct(MpegEncContext *s,
3715                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3716 {
3717     if (s->block_last_index[i] >= 0) {
3718         s->dsp.idct_add (dest, line_size, block);
3719     }
3720 }
3721
3722 static inline void add_dequant_dct(MpegEncContext *s,
3723                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3724 {
3725     if (s->block_last_index[i] >= 0) {
3726         s->dct_unquantize_inter(s, block, i, qscale);
3727
3728         s->dsp.idct_add (dest, line_size, block);
3729     }
3730 }
3731
3732 /**
3733  * cleans dc, ac, coded_block for the current non intra MB
3734  */
3735 void ff_clean_intra_table_entries(MpegEncContext *s)
3736 {
3737     int wrap = s->b8_stride;
3738     int xy = s->block_index[0];
3739
3740     s->dc_val[0][xy           ] =
3741     s->dc_val[0][xy + 1       ] =
3742     s->dc_val[0][xy     + wrap] =
3743     s->dc_val[0][xy + 1 + wrap] = 1024;
3744     /* ac pred */
3745     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3746     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3747     if (s->msmpeg4_version>=3) {
3748         s->coded_block[xy           ] =
3749         s->coded_block[xy + 1       ] =
3750         s->coded_block[xy     + wrap] =
3751         s->coded_block[xy + 1 + wrap] = 0;
3752     }
3753     /* chroma */
3754     wrap = s->mb_stride;
3755     xy = s->mb_x + s->mb_y * wrap;
3756     s->dc_val[1][xy] =
3757     s->dc_val[2][xy] = 1024;
3758     /* ac pred */
3759     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3760     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3761
3762     s->mbintra_table[xy]= 0;
3763 }
3764
3765 /* generic function called after a macroblock has been parsed by the
3766    decoder or after it has been encoded by the encoder.
3767
3768    Important variables used:
3769    s->mb_intra : true if intra macroblock
3770    s->mv_dir   : motion vector direction
3771    s->mv_type  : motion vector type
3772    s->mv       : motion vector
3773    s->interlaced_dct : true if interlaced dct used (mpeg2)
3774  */
3775 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3776 {
3777     int mb_x, mb_y;
3778     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3779 #ifdef HAVE_XVMC
3780     if(s->avctx->xvmc_acceleration){
3781         XVMC_decode_mb(s);//xvmc uses pblocks
3782         return;
3783     }
3784 #endif
3785
3786     mb_x = s->mb_x;
3787     mb_y = s->mb_y;
3788
3789     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3790        /* save DCT coefficients */
3791        int i,j;
3792        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3793        for(i=0; i<6; i++)
3794            for(j=0; j<64; j++)
3795                *dct++ = block[i][s->dsp.idct_permutation[j]];
3796     }
3797
3798     s->current_picture.qscale_table[mb_xy]= s->qscale;
3799
3800     /* update DC predictors for P macroblocks */
3801     if (!s->mb_intra) {
3802         if (s->h263_pred || s->h263_aic) {
3803             if(s->mbintra_table[mb_xy])
3804                 ff_clean_intra_table_entries(s);
3805         } else {
3806             s->last_dc[0] =
3807             s->last_dc[1] =
3808             s->last_dc[2] = 128 << s->intra_dc_precision;
3809         }
3810     }
3811     else if (s->h263_pred || s->h263_aic)
3812         s->mbintra_table[mb_xy]=1;
3813
3814     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3815         uint8_t *dest_y, *dest_cb, *dest_cr;
3816         int dct_linesize, dct_offset;
3817         op_pixels_func (*op_pix)[4];
3818         qpel_mc_func (*op_qpix)[16];
3819         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3820         const int uvlinesize= s->current_picture.linesize[1];
3821         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3822         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3823
3824         /* avoid copy if macroblock skipped in last frame too */
3825         /* skip only during decoding as we might trash the buffers during encoding a bit */
3826         if(!s->encoding){
3827             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3828             const int age= s->current_picture.age;
3829
3830             assert(age);
3831
3832             if (s->mb_skipped) {
3833                 s->mb_skipped= 0;
3834                 assert(s->pict_type!=I_TYPE);
3835
3836                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3837                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3838
3839                 /* if previous was skipped too, then nothing to do !  */
3840                 if (*mbskip_ptr >= age && s->current_picture.reference){
3841                     return;
3842                 }
3843             } else if(!s->current_picture.reference){
3844                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3845                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3846             } else{
3847                 *mbskip_ptr = 0; /* not skipped */
3848             }
3849         }
3850
3851         dct_linesize = linesize << s->interlaced_dct;
3852         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3853
3854         if(readable){
3855             dest_y=  s->dest[0];
3856             dest_cb= s->dest[1];
3857             dest_cr= s->dest[2];
3858         }else{
3859             dest_y = s->b_scratchpad;
3860             dest_cb= s->b_scratchpad+16*linesize;
3861             dest_cr= s->b_scratchpad+32*linesize;
3862         }
3863
3864         if (!s->mb_intra) {
3865             /* motion handling */
3866             /* decoding or more than one mb_type (MC was already done otherwise) */
3867             if(!s->encoding){
3868                 if(lowres_flag){
3869                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3870
3871                     if (s->mv_dir & MV_DIR_FORWARD) {
3872                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3873                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3874                     }
3875                     if (s->mv_dir & MV_DIR_BACKWARD) {
3876                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3877                     }
3878                 }else{
3879                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3880                         op_pix = s->dsp.put_pixels_tab;
3881                         op_qpix= s->dsp.put_qpel_pixels_tab;
3882                     }else{
3883                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3884                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3885                     }
3886                     if (s->mv_dir & MV_DIR_FORWARD) {
3887                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3888                         op_pix = s->dsp.avg_pixels_tab;
3889                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3890                     }
3891                     if (s->mv_dir & MV_DIR_BACKWARD) {
3892                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3893                     }
3894                 }
3895             }
3896
3897             /* skip dequant / idct if we are really late ;) */
3898             if(s->hurry_up>1) goto skip_idct;
3899             if(s->avctx->skip_idct){
3900                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3901                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3902                    || s->avctx->skip_idct >= AVDISCARD_ALL)
3903                     goto skip_idct;
3904             }
3905
3906             /* add dct residue */
3907             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3908                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3909                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3910                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3911                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3912                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3913
3914                 if(!(s->flags&CODEC_FLAG_GRAY)){
3915                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3916                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3917                 }
3918             } else if(s->codec_id != CODEC_ID_WMV2){
3919                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3920                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3921                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3922                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3923
3924                 if(!(s->flags&CODEC_FLAG_GRAY)){
3925                     if(s->chroma_y_shift){//Chroma420
3926                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3927                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3928                     }else{
3929                         //chroma422
3930                         dct_linesize = uvlinesize << s->interlaced_dct;
3931                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3932
3933                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3934                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3935                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3936                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3937                         if(!s->chroma_x_shift){//Chroma444
3938                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3939                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3940                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3941                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3942                         }
3943                     }
3944                 }//fi gray
3945             }
3946             else{
3947                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3948             }
3949         } else {
3950             /* dct only in intra block */
3951             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3952                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3953                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3954                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3955                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3956
3957                 if(!(s->flags&CODEC_FLAG_GRAY)){
3958                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3959                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3960                 }
3961             }else{
3962                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
3963                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
3964                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
3965                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
3966
3967                 if(!(s->flags&CODEC_FLAG_GRAY)){
3968                     if(s->chroma_y_shift){
3969                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3970                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3971                     }else{
3972
3973                         dct_linesize = uvlinesize << s->interlaced_dct;
3974                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3975
3976                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
3977                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
3978                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
3979                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
3980                         if(!s->chroma_x_shift){//Chroma444
3981                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
3982                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
3983                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
3984                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
3985                         }
3986                     }
3987                 }//gray
3988             }
3989         }
3990 skip_idct:
3991         if(!readable){
3992             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
3993             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
3994             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
3995         }
3996     }
3997 }
3998
3999 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4000     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4001     else                  MPV_decode_mb_internal(s, block, 0);
4002 }
4003
4004 #ifdef CONFIG_ENCODERS
4005
4006 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4007 {
4008     static const char tab[64]=
4009         {3,2,2,1,1,1,1,1,
4010          1,1,1,1,1,1,1,1,
4011          1,1,1,1,1,1,1,1,
4012          0,0,0,0,0,0,0,0,
4013          0,0,0,0,0,0,0,0,
4014          0,0,0,0,0,0,0,0,
4015          0,0,0,0,0,0,0,0,
4016          0,0,0,0,0,0,0,0};
4017     int score=0;
4018     int run=0;
4019     int i;
4020     DCTELEM *block= s->block[n];
4021     const int last_index= s->block_last_index[n];
4022     int skip_dc;
4023
4024     if(threshold<0){
4025         skip_dc=0;
4026         threshold= -threshold;
4027     }else
4028         skip_dc=1;
4029
4030     /* are all which we could set to zero are allready zero? */
4031     if(last_index<=skip_dc - 1) return;
4032
4033     for(i=0; i<=last_index; i++){
4034         const int j = s->intra_scantable.permutated[i];
4035         const int level = ABS(block[j]);
4036         if(level==1){
4037             if(skip_dc && i==0) continue;
4038             score+= tab[run];
4039             run=0;
4040         }else if(level>1){
4041             return;
4042         }else{
4043             run++;
4044         }
4045     }
4046     if(score >= threshold) return;
4047     for(i=skip_dc; i<=last_index; i++){
4048         const int j = s->intra_scantable.permutated[i];
4049         block[j]=0;
4050     }
4051     if(block[0]) s->block_last_index[n]= 0;
4052     else         s->block_last_index[n]= -1;
4053 }
4054
4055 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4056 {
4057     int i;
4058     const int maxlevel= s->max_qcoeff;
4059     const int minlevel= s->min_qcoeff;
4060     int overflow=0;
4061
4062     if(s->mb_intra){
4063         i=1; //skip clipping of intra dc
4064     }else
4065         i=0;
4066
4067     for(;i<=last_index; i++){
4068         const int j= s->intra_scantable.permutated[i];
4069         int level = block[j];
4070
4071         if     (level>maxlevel){
4072             level=maxlevel;
4073             overflow++;
4074         }else if(level<minlevel){
4075             level=minlevel;
4076             overflow++;
4077         }
4078
4079         block[j]= level;
4080     }
4081
4082     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4083         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4084 }
4085
4086 #endif //CONFIG_ENCODERS
4087
4088 /**
4089  *
4090  * @param h is the normal height, this will be reduced automatically if needed for the last row
4091  */
4092 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4093     if (s->avctx->draw_horiz_band) {
4094         AVFrame *src;
4095         int offset[4];
4096
4097         if(s->picture_structure != PICT_FRAME){
4098             h <<= 1;
4099             y <<= 1;
4100             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4101         }
4102
4103         h= FFMIN(h, s->avctx->height - y);
4104
4105         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4106             src= (AVFrame*)s->current_picture_ptr;
4107         else if(s->last_picture_ptr)
4108             src= (AVFrame*)s->last_picture_ptr;
4109         else
4110             return;
4111
4112         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4113             offset[0]=
4114             offset[1]=
4115             offset[2]=
4116             offset[3]= 0;
4117         }else{
4118             offset[0]= y * s->linesize;;
4119             offset[1]=
4120             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4121             offset[3]= 0;
4122         }
4123
4124         emms_c();
4125
4126         s->avctx->draw_horiz_band(s->avctx, src, offset,
4127                                   y, s->picture_structure, h);
4128     }
4129 }
4130
4131 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4132     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4133     const int uvlinesize= s->current_picture.linesize[1];
4134     const int mb_size= 4 - s->avctx->lowres;
4135
4136     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4137     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4138     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4139     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4140     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4141     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4142     //block_index is not used by mpeg2, so it is not affected by chroma_format
4143
4144     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4145     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4146     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4147
4148     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4149     {
4150         s->dest[0] += s->mb_y *   linesize << mb_size;
4151         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4152         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4153     }
4154 }
4155
4156 #ifdef CONFIG_ENCODERS
4157
4158 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4159     int x, y;
4160 //FIXME optimize
4161     for(y=0; y<8; y++){
4162         for(x=0; x<8; x++){
4163             int x2, y2;
4164             int sum=0;
4165             int sqr=0;
4166             int count=0;
4167
4168             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4169                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4170                     int v= ptr[x2 + y2*stride];
4171                     sum += v;
4172                     sqr += v*v;
4173                     count++;
4174                 }
4175             }
4176             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4177         }
4178     }
4179 }
4180
4181 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4182 {
4183     int16_t weight[6][64];
4184     DCTELEM orig[6][64];
4185     const int mb_x= s->mb_x;
4186     const int mb_y= s->mb_y;
4187     int i;
4188     int skip_dct[6];
4189     int dct_offset   = s->linesize*8; //default for progressive frames
4190     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4191     int wrap_y, wrap_c;
4192
4193     for(i=0; i<6; i++) skip_dct[i]=0;
4194
4195     if(s->adaptive_quant){
4196         const int last_qp= s->qscale;
4197         const int mb_xy= mb_x + mb_y*s->mb_stride;
4198
4199         s->lambda= s->lambda_table[mb_xy];
4200         update_qscale(s);
4201
4202         if(!(s->flags&CODEC_FLAG_QP_RD)){
4203             s->dquant= s->qscale - last_qp;
4204
4205             if(s->out_format==FMT_H263){
4206                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4207
4208                 if(s->codec_id==CODEC_ID_MPEG4){
4209                     if(!s->mb_intra){
4210                         if(s->pict_type == B_TYPE){
4211                             if(s->dquant&1)
4212                                 s->dquant= (s->dquant/2)*2;
4213                             if(s->mv_dir&MV_DIRECT)
4214                                 s->dquant= 0;
4215                         }
4216                         if(s->mv_type==MV_TYPE_8X8)
4217                             s->dquant=0;
4218                     }
4219                 }
4220             }
4221         }
4222         ff_set_qscale(s, last_qp + s->dquant);
4223     }else if(s->flags&CODEC_FLAG_QP_RD)
4224         ff_set_qscale(s, s->qscale + s->dquant);
4225
4226     wrap_y = s->linesize;
4227     wrap_c = s->uvlinesize;
4228     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4229     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4230     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4231
4232     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4233         uint8_t *ebuf= s->edge_emu_buffer + 32;
4234         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4235         ptr_y= ebuf;
4236         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4237         ptr_cb= ebuf+18*wrap_y;
4238         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4239         ptr_cr= ebuf+18*wrap_y+8;
4240     }
4241
4242     if (s->mb_intra) {
4243         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4244             int progressive_score, interlaced_score;
4245
4246             s->interlaced_dct=0;
4247             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4248                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4249
4250             if(progressive_score > 0){
4251                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4252                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4253                 if(progressive_score > interlaced_score){
4254                     s->interlaced_dct=1;
4255
4256                     dct_offset= wrap_y;
4257                     wrap_y<<=1;
4258                 }
4259             }
4260         }
4261
4262         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4263         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4264         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4265         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4266
4267         if(s->flags&CODEC_FLAG_GRAY){
4268             skip_dct[4]= 1;
4269             skip_dct[5]= 1;
4270         }else{
4271             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4272             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4273         }
4274     }else{
4275         op_pixels_func (*op_pix)[4];
4276         qpel_mc_func (*op_qpix)[16];
4277         uint8_t *dest_y, *dest_cb, *dest_cr;
4278
4279         dest_y  = s->dest[0];
4280         dest_cb = s->dest[1];
4281         dest_cr = s->dest[2];
4282
4283         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4284             op_pix = s->dsp.put_pixels_tab;
4285             op_qpix= s->dsp.put_qpel_pixels_tab;
4286         }else{
4287             op_pix = s->dsp.put_no_rnd_pixels_tab;
4288             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4289         }
4290
4291         if (s->mv_dir & MV_DIR_FORWARD) {
4292             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4293             op_pix = s->dsp.avg_pixels_tab;
4294             op_qpix= s->dsp.avg_qpel_pixels_tab;
4295         }
4296         if (s->mv_dir & MV_DIR_BACKWARD) {
4297             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4298         }
4299
4300         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4301             int progressive_score, interlaced_score;
4302
4303             s->interlaced_dct=0;
4304             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4305                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4306
4307             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4308
4309             if(progressive_score>0){
4310                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4311                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4312
4313                 if(progressive_score > interlaced_score){
4314                     s->interlaced_dct=1;
4315
4316                     dct_offset= wrap_y;
4317                     wrap_y<<=1;
4318                 }
4319             }
4320         }
4321
4322         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4323         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4324         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4325         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4326
4327         if(s->flags&CODEC_FLAG_GRAY){
4328             skip_dct[4]= 1;
4329             skip_dct[5]= 1;
4330         }else{
4331             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4332             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4333         }
4334         /* pre quantization */
4335         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4336             //FIXME optimize
4337             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4338             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4339             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4340             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4341             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4342             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4343         }
4344     }
4345
4346     if(s->avctx->quantizer_noise_shaping){
4347         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4348         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4349         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4350         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4351         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4352         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4353         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4354     }
4355
4356     /* DCT & quantize */
4357     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4358     {
4359         for(i=0;i<6;i++) {
4360             if(!skip_dct[i]){
4361                 int overflow;
4362                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4363             // FIXME we could decide to change to quantizer instead of clipping
4364             // JS: I don't think that would be a good idea it could lower quality instead
4365             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4366                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4367             }else
4368                 s->block_last_index[i]= -1;
4369         }
4370         if(s->avctx->quantizer_noise_shaping){
4371             for(i=0;i<6;i++) {
4372                 if(!skip_dct[i]){
4373                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4374                 }
4375             }
4376         }
4377
4378         if(s->luma_elim_threshold && !s->mb_intra)
4379             for(i=0; i<4; i++)
4380                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4381         if(s->chroma_elim_threshold && !s->mb_intra)
4382             for(i=4; i<6; i++)
4383                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4384
4385         if(s->flags & CODEC_FLAG_CBP_RD){
4386             for(i=0;i<6;i++) {
4387                 if(s->block_last_index[i] == -1)
4388                     s->coded_score[i]= INT_MAX/256;
4389             }
4390         }
4391     }
4392
4393     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4394         s->block_last_index[4]=
4395         s->block_last_index[5]= 0;
4396         s->block[4][0]=
4397         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4398     }
4399
4400     //non c quantize code returns incorrect block_last_index FIXME
4401     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4402         for(i=0; i<6; i++){
4403             int j;
4404             if(s->block_last_index[i]>0){
4405                 for(j=63; j>0; j--){
4406                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4407                 }
4408                 s->block_last_index[i]= j;
4409             }
4410         }
4411     }
4412
4413     /* huffman encode */
4414     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4415     case CODEC_ID_MPEG1VIDEO:
4416     case CODEC_ID_MPEG2VIDEO:
4417         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4418     case CODEC_ID_MPEG4:
4419         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4420     case CODEC_ID_MSMPEG4V2:
4421     case CODEC_ID_MSMPEG4V3:
4422     case CODEC_ID_WMV1:
4423         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4424     case CODEC_ID_WMV2:
4425          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4426 #ifdef CONFIG_H261_ENCODER
4427     case CODEC_ID_H261:
4428         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4429 #endif
4430     case CODEC_ID_H263:
4431     case CODEC_ID_H263P:
4432     case CODEC_ID_FLV1:
4433     case CODEC_ID_RV10:
4434     case CODEC_ID_RV20:
4435         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4436     case CODEC_ID_MJPEG:
4437         mjpeg_encode_mb(s, s->block); break;
4438     default:
4439         assert(0);
4440     }
4441 }
4442
4443 #endif //CONFIG_ENCODERS
4444
4445 void ff_mpeg_flush(AVCodecContext *avctx){
4446     int i;
4447     MpegEncContext *s = avctx->priv_data;
4448
4449     if(s==NULL || s->picture==NULL)
4450         return;
4451
4452     for(i=0; i<MAX_PICTURE_COUNT; i++){
4453        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4454                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4455         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4456     }
4457     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4458
4459     s->mb_x= s->mb_y= 0;
4460
4461     s->parse_context.state= -1;
4462     s->parse_context.frame_start_found= 0;
4463     s->parse_context.overread= 0;
4464     s->parse_context.overread_index= 0;
4465     s->parse_context.index= 0;
4466     s->parse_context.last_index= 0;
4467     s->bitstream_buffer_size=0;
4468 }
4469
4470 #ifdef CONFIG_ENCODERS
4471 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4472 {
4473     const uint16_t *srcw= (uint16_t*)src;
4474     int words= length>>4;
4475     int bits= length&15;
4476     int i;
4477
4478     if(length==0) return;
4479
4480     if(words < 16){
4481         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4482     }else if(put_bits_count(pb)&7){
4483         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4484     }else{
4485         for(i=0; put_bits_count(pb)&31; i++)
4486             put_bits(pb, 8, src[i]);
4487         flush_put_bits(pb);
4488         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4489         skip_put_bytes(pb, 2*words-i);
4490     }
4491
4492     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4493 }
4494
4495 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4496     int i;
4497
4498     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4499
4500     /* mpeg1 */
4501     d->mb_skip_run= s->mb_skip_run;
4502     for(i=0; i<3; i++)
4503         d->last_dc[i]= s->last_dc[i];
4504
4505     /* statistics */
4506     d->mv_bits= s->mv_bits;
4507     d->i_tex_bits= s->i_tex_bits;
4508     d->p_tex_bits= s->p_tex_bits;
4509     d->i_count= s->i_count;
4510     d->f_count= s->f_count;
4511     d->b_count= s->b_count;
4512     d->skip_count= s->skip_count;
4513     d->misc_bits= s->misc_bits;
4514     d->last_bits= 0;
4515
4516     d->mb_skipped= 0;
4517     d->qscale= s->qscale;
4518     d->dquant= s->dquant;
4519 }
4520
4521 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4522     int i;
4523
4524     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4525     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4526
4527     /* mpeg1 */
4528     d->mb_skip_run= s->mb_skip_run;
4529     for(i=0; i<3; i++)
4530         d->last_dc[i]= s->last_dc[i];
4531
4532     /* statistics */
4533     d->mv_bits= s->mv_bits;
4534     d->i_tex_bits= s->i_tex_bits;
4535     d->p_tex_bits= s->p_tex_bits;
4536     d->i_count= s->i_count;
4537     d->f_count= s->f_count;
4538     d->b_count= s->b_count;
4539     d->skip_count= s->skip_count;
4540     d->misc_bits= s->misc_bits;
4541
4542     d->mb_intra= s->mb_intra;
4543     d->mb_skipped= s->mb_skipped;
4544     d->mv_type= s->mv_type;
4545     d->mv_dir= s->mv_dir;
4546     d->pb= s->pb;
4547     if(s->data_partitioning){
4548         d->pb2= s->pb2;
4549         d->tex_pb= s->tex_pb;
4550     }
4551     d->block= s->block;
4552     for(i=0; i<6; i++)
4553         d->block_last_index[i]= s->block_last_index[i];
4554     d->interlaced_dct= s->interlaced_dct;
4555     d->qscale= s->qscale;
4556 }
4557
4558 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4559                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4560                            int *dmin, int *next_block, int motion_x, int motion_y)
4561 {
4562     int score;
4563     uint8_t *dest_backup[3];
4564
4565     copy_context_before_encode(s, backup, type);
4566
4567     s->block= s->blocks[*next_block];
4568     s->pb= pb[*next_block];
4569     if(s->data_partitioning){
4570         s->pb2   = pb2   [*next_block];
4571         s->tex_pb= tex_pb[*next_block];
4572     }
4573
4574     if(*next_block){
4575         memcpy(dest_backup, s->dest, sizeof(s->dest));
4576         s->dest[0] = s->rd_scratchpad;
4577         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4578         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4579         assert(s->linesize >= 32); //FIXME
4580     }
4581
4582     encode_mb(s, motion_x, motion_y);
4583
4584     score= put_bits_count(&s->pb);
4585     if(s->data_partitioning){
4586         score+= put_bits_count(&s->pb2);
4587         score+= put_bits_count(&s->tex_pb);
4588     }
4589
4590     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4591         MPV_decode_mb(s, s->block);
4592
4593         score *= s->lambda2;
4594         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4595     }
4596
4597     if(*next_block){
4598         memcpy(s->dest, dest_backup, sizeof(s->dest));
4599     }
4600
4601     if(score<*dmin){
4602         *dmin= score;
4603         *next_block^=1;
4604
4605         copy_context_after_encode(best, s, type);
4606     }
4607 }
4608
4609 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4610     uint32_t *sq = squareTbl + 256;
4611     int acc=0;
4612     int x,y;
4613
4614     if(w==16 && h==16)
4615         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4616     else if(w==8 && h==8)
4617         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4618
4619     for(y=0; y<h; y++){
4620         for(x=0; x<w; x++){
4621             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4622         }
4623     }
4624
4625     assert(acc>=0);
4626
4627     return acc;
4628 }
4629
4630 static int sse_mb(MpegEncContext *s){
4631     int w= 16;
4632     int h= 16;
4633
4634     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4635     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4636
4637     if(w==16 && h==16)
4638       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4639         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4640                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4641                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4642       }else{
4643         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4644                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4645                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4646       }
4647     else
4648         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4649                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4650                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4651 }
4652
4653 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4654     MpegEncContext *s= arg;
4655
4656
4657     s->me.pre_pass=1;
4658     s->me.dia_size= s->avctx->pre_dia_size;
4659     s->first_slice_line=1;
4660     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4661         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4662             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4663         }
4664         s->first_slice_line=0;
4665     }
4666
4667     s->me.pre_pass=0;
4668
4669     return 0;
4670 }
4671
4672 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4673     MpegEncContext *s= arg;
4674
4675     s->me.dia_size= s->avctx->dia_size;
4676     s->first_slice_line=1;
4677     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4678         s->mb_x=0; //for block init below
4679         ff_init_block_index(s);
4680         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4681             s->block_index[0]+=2;
4682             s->block_index[1]+=2;
4683             s->block_index[2]+=2;
4684             s->block_index[3]+=2;
4685
4686             /* compute motion vector & mb_type and store in context */
4687             if(s->pict_type==B_TYPE)
4688                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4689             else
4690                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4691         }
4692         s->first_slice_line=0;
4693     }
4694     return 0;
4695 }
4696
4697 static int mb_var_thread(AVCodecContext *c, void *arg){
4698     MpegEncContext *s= arg;
4699     int mb_x, mb_y;
4700
4701     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4702         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4703             int xx = mb_x * 16;
4704             int yy = mb_y * 16;
4705             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4706             int varc;
4707             int sum = s->dsp.pix_sum(pix, s->linesize);
4708
4709             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4710
4711             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4712             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4713             s->me.mb_var_sum_temp    += varc;
4714         }
4715     }
4716     return 0;
4717 }
4718
4719 static void write_slice_end(MpegEncContext *s){
4720     if(s->codec_id==CODEC_ID_MPEG4){
4721         if(s->partitioned_frame){
4722             ff_mpeg4_merge_partitions(s);
4723         }
4724
4725         ff_mpeg4_stuffing(&s->pb);
4726     }else if(s->out_format == FMT_MJPEG){
4727         ff_mjpeg_stuffing(&s->pb);
4728     }
4729
4730     align_put_bits(&s->pb);
4731     flush_put_bits(&s->pb);
4732
4733     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4734         s->misc_bits+= get_bits_diff(s);
4735 }
4736
4737 static int encode_thread(AVCodecContext *c, void *arg){
4738     MpegEncContext *s= arg;
4739     int mb_x, mb_y, pdif = 0;
4740     int i, j;
4741     MpegEncContext best_s, backup_s;
4742     uint8_t bit_buf[2][MAX_MB_BYTES];
4743     uint8_t bit_buf2[2][MAX_MB_BYTES];
4744     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4745     PutBitContext pb[2], pb2[2], tex_pb[2];
4746 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4747
4748     for(i=0; i<2; i++){
4749         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4750         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4751         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4752     }
4753
4754     s->last_bits= put_bits_count(&s->pb);
4755     s->mv_bits=0;
4756     s->misc_bits=0;
4757     s->i_tex_bits=0;
4758     s->p_tex_bits=0;
4759     s->i_count=0;
4760     s->f_count=0;
4761     s->b_count=0;
4762     s->skip_count=0;
4763
4764     for(i=0; i<3; i++){
4765         /* init last dc values */
4766         /* note: quant matrix value (8) is implied here */
4767         s->last_dc[i] = 128 << s->intra_dc_precision;
4768
4769         s->current_picture.error[i] = 0;
4770     }
4771     s->mb_skip_run = 0;
4772     memset(s->last_mv, 0, sizeof(s->last_mv));
4773
4774     s->last_mv_dir = 0;
4775
4776     switch(s->codec_id){
4777     case CODEC_ID_H263:
4778     case CODEC_ID_H263P:
4779     case CODEC_ID_FLV1:
4780         s->gob_index = ff_h263_get_gob_height(s);
4781         break;
4782     case CODEC_ID_MPEG4:
4783         if(s->partitioned_frame)
4784             ff_mpeg4_init_partitions(s);
4785         break;
4786     }
4787
4788     s->resync_mb_x=0;
4789     s->resync_mb_y=0;
4790     s->first_slice_line = 1;
4791     s->ptr_lastgob = s->pb.buf;
4792     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4793 //    printf("row %d at %X\n", s->mb_y, (int)s);
4794         s->mb_x=0;
4795         s->mb_y= mb_y;
4796
4797         ff_set_qscale(s, s->qscale);
4798         ff_init_block_index(s);
4799
4800         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4801             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4802             int mb_type= s->mb_type[xy];
4803 //            int d;
4804             int dmin= INT_MAX;
4805             int dir;
4806
4807             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4808                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4809                 return -1;
4810             }
4811             if(s->data_partitioning){
4812                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4813                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4814                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4815                     return -1;
4816                 }
4817             }
4818
4819             s->mb_x = mb_x;
4820             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4821             ff_update_block_index(s);
4822
4823 #ifdef CONFIG_H261_ENCODER
4824             if(s->codec_id == CODEC_ID_H261){
4825                 ff_h261_reorder_mb_index(s);
4826                 xy= s->mb_y*s->mb_stride + s->mb_x;
4827                 mb_type= s->mb_type[xy];
4828             }
4829 #endif
4830
4831             /* write gob / video packet header  */
4832             if(s->rtp_mode){
4833                 int current_packet_size, is_gob_start;
4834
4835                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4836
4837                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4838
4839                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4840
4841                 switch(s->codec_id){
4842                 case CODEC_ID_H263:
4843                 case CODEC_ID_H263P:
4844                     if(!s->h263_slice_structured)
4845                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4846                     break;
4847                 case CODEC_ID_MPEG2VIDEO:
4848                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4849                 case CODEC_ID_MPEG1VIDEO:
4850                     if(s->mb_skip_run) is_gob_start=0;
4851                     break;
4852                 }
4853
4854                 if(is_gob_start){
4855                     if(s->start_mb_y != mb_y || mb_x!=0){
4856                         write_slice_end(s);
4857
4858                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4859                             ff_mpeg4_init_partitions(s);
4860                         }
4861                     }
4862
4863                     assert((put_bits_count(&s->pb)&7) == 0);
4864                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4865
4866                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4867                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4868                         int d= 100 / s->avctx->error_rate;
4869                         if(r % d == 0){
4870                             current_packet_size=0;
4871 #ifndef ALT_BITSTREAM_WRITER
4872                             s->pb.buf_ptr= s->ptr_lastgob;
4873 #endif
4874                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4875                         }
4876                     }
4877
4878                     if (s->avctx->rtp_callback){
4879                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4880                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4881                     }
4882
4883                     switch(s->codec_id){
4884                     case CODEC_ID_MPEG4:
4885                         ff_mpeg4_encode_video_packet_header(s);
4886                         ff_mpeg4_clean_buffers(s);
4887                     break;
4888                     case CODEC_ID_MPEG1VIDEO:
4889                     case CODEC_ID_MPEG2VIDEO:
4890                         ff_mpeg1_encode_slice_header(s);
4891                         ff_mpeg1_clean_buffers(s);
4892                     break;
4893                     case CODEC_ID_H263:
4894                     case CODEC_ID_H263P:
4895                         h263_encode_gob_header(s, mb_y);
4896                     break;
4897                     }
4898
4899                     if(s->flags&CODEC_FLAG_PASS1){
4900                         int bits= put_bits_count(&s->pb);
4901                         s->misc_bits+= bits - s->last_bits;
4902                         s->last_bits= bits;
4903                     }
4904
4905                     s->ptr_lastgob += current_packet_size;
4906                     s->first_slice_line=1;
4907                     s->resync_mb_x=mb_x;
4908                     s->resync_mb_y=mb_y;
4909                 }
4910             }
4911
4912             if(  (s->resync_mb_x   == s->mb_x)
4913                && s->resync_mb_y+1 == s->mb_y){
4914                 s->first_slice_line=0;
4915             }
4916
4917             s->mb_skipped=0;
4918             s->dquant=0; //only for QP_RD
4919
4920             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
4921                 int next_block=0;
4922                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4923
4924                 copy_context_before_encode(&backup_s, s, -1);
4925                 backup_s.pb= s->pb;
4926                 best_s.data_partitioning= s->data_partitioning;
4927                 best_s.partitioned_frame= s->partitioned_frame;
4928                 if(s->data_partitioning){
4929                     backup_s.pb2= s->pb2;
4930                     backup_s.tex_pb= s->tex_pb;
4931                 }
4932
4933                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4934                     s->mv_dir = MV_DIR_FORWARD;
4935                     s->mv_type = MV_TYPE_16X16;
4936                     s->mb_intra= 0;
4937                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4938                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4939                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
4940                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4941                 }
4942                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
4943                     s->mv_dir = MV_DIR_FORWARD;
4944                     s->mv_type = MV_TYPE_FIELD;
4945                     s->mb_intra= 0;
4946                     for(i=0; i<2; i++){
4947                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4948                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4949                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4950                     }
4951                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
4952                                  &dmin, &next_block, 0, 0);
4953                 }
4954                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
4955                     s->mv_dir = MV_DIR_FORWARD;
4956                     s->mv_type = MV_TYPE_16X16;
4957                     s->mb_intra= 0;
4958                     s->mv[0][0][0] = 0;
4959                     s->mv[0][0][1] = 0;
4960                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
4961                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4962                 }
4963                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
4964                     s->mv_dir = MV_DIR_FORWARD;
4965                     s->mv_type = MV_TYPE_8X8;
4966                     s->mb_intra= 0;
4967                     for(i=0; i<4; i++){
4968                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4969                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4970                     }
4971                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
4972                                  &dmin, &next_block, 0, 0);
4973                 }
4974                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4975                     s->mv_dir = MV_DIR_FORWARD;
4976                     s->mv_type = MV_TYPE_16X16;
4977                     s->mb_intra= 0;
4978                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4979                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4980                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
4981                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4982                 }
4983                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4984                     s->mv_dir = MV_DIR_BACKWARD;
4985                     s->mv_type = MV_TYPE_16X16;
4986                     s->mb_intra= 0;
4987                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4988                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4989                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
4990                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4991                 }
4992                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4993                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4994                     s->mv_type = MV_TYPE_16X16;
4995                     s->mb_intra= 0;
4996                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4997                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4998                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4999                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5000                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5001                                  &dmin, &next_block, 0, 0);
5002                 }
5003                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5004                     int mx= s->b_direct_mv_table[xy][0];
5005                     int my= s->b_direct_mv_table[xy][1];
5006
5007                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5008                     s->mb_intra= 0;
5009                     ff_mpeg4_set_direct_mv(s, mx, my);
5010                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5011                                  &dmin, &next_block, mx, my);
5012                 }
5013                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5014                     s->mv_dir = MV_DIR_FORWARD;
5015                     s->mv_type = MV_TYPE_FIELD;
5016                     s->mb_intra= 0;
5017                     for(i=0; i<2; i++){
5018                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5019                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5020                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5021                     }
5022                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5023                                  &dmin, &next_block, 0, 0);
5024                 }
5025                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5026                     s->mv_dir = MV_DIR_BACKWARD;
5027                     s->mv_type = MV_TYPE_FIELD;
5028                     s->mb_intra= 0;
5029                     for(i=0; i<2; i++){
5030                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5031                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5032                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5033                     }
5034                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5035                                  &dmin, &next_block, 0, 0);
5036                 }
5037                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5038                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5039                     s->mv_type = MV_TYPE_FIELD;
5040                     s->mb_intra= 0;
5041                     for(dir=0; dir<2; dir++){
5042                         for(i=0; i<2; i++){
5043                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5044                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5045                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5046                         }
5047                     }
5048                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5049                                  &dmin, &next_block, 0, 0);
5050                 }
5051                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5052                     s->mv_dir = 0;
5053                     s->mv_type = MV_TYPE_16X16;
5054                     s->mb_intra= 1;
5055                     s->mv[0][0][0] = 0;
5056                     s->mv[0][0][1] = 0;
5057                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5058                                  &dmin, &next_block, 0, 0);
5059                     if(s->h263_pred || s->h263_aic){
5060                         if(best_s.mb_intra)
5061                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5062                         else
5063                             ff_clean_intra_table_entries(s); //old mode?
5064                     }
5065                 }
5066
5067                 if(s->flags & CODEC_FLAG_QP_RD){
5068                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
5069                         const int last_qp= backup_s.qscale;
5070                         int dquant, dir, qp, dc[6];
5071                         DCTELEM ac[6][16];
5072                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5073
5074                         assert(backup_s.dquant == 0);
5075
5076                         //FIXME intra
5077                         s->mv_dir= best_s.mv_dir;
5078                         s->mv_type = MV_TYPE_16X16;
5079                         s->mb_intra= best_s.mb_intra;
5080                         s->mv[0][0][0] = best_s.mv[0][0][0];
5081                         s->mv[0][0][1] = best_s.mv[0][0][1];
5082                         s->mv[1][0][0] = best_s.mv[1][0][0];
5083                         s->mv[1][0][1] = best_s.mv[1][0][1];
5084
5085                         dir= s->pict_type == B_TYPE ? 2 : 1;
5086                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
5087                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
5088                             qp= last_qp + dquant;
5089                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5090                                 break;
5091                             backup_s.dquant= dquant;
5092                             if(s->mb_intra && s->dc_val[0]){
5093                                 for(i=0; i<6; i++){
5094                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5095                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5096                                 }
5097                             }
5098
5099                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5100                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5101                             if(best_s.qscale != qp){
5102                                 if(s->mb_intra && s->dc_val[0]){
5103                                     for(i=0; i<6; i++){
5104                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5105                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5106                                     }
5107                                 }
5108                                 if(dir > 0 && dquant==dir){
5109                                     dquant= 0;
5110                                     dir= -dir;
5111                                 }else
5112                                     break;
5113                             }
5114                         }
5115                         qp= best_s.qscale;
5116                         s->current_picture.qscale_table[xy]= qp;
5117                     }
5118                 }
5119
5120                 copy_context_after_encode(s, &best_s, -1);
5121
5122                 pb_bits_count= put_bits_count(&s->pb);
5123                 flush_put_bits(&s->pb);
5124                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5125                 s->pb= backup_s.pb;
5126
5127                 if(s->data_partitioning){
5128                     pb2_bits_count= put_bits_count(&s->pb2);
5129                     flush_put_bits(&s->pb2);
5130                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5131                     s->pb2= backup_s.pb2;
5132
5133                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5134                     flush_put_bits(&s->tex_pb);
5135                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5136                     s->tex_pb= backup_s.tex_pb;
5137                 }
5138                 s->last_bits= put_bits_count(&s->pb);
5139
5140                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5141                     ff_h263_update_motion_val(s);
5142
5143                 if(next_block==0){ //FIXME 16 vs linesize16
5144                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5145                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5146                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5147                 }
5148
5149                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5150                     MPV_decode_mb(s, s->block);
5151             } else {
5152                 int motion_x, motion_y;
5153                 s->mv_type=MV_TYPE_16X16;
5154                 // only one MB-Type possible
5155
5156                 switch(mb_type){
5157                 case CANDIDATE_MB_TYPE_INTRA:
5158                     s->mv_dir = 0;
5159                     s->mb_intra= 1;
5160                     motion_x= s->mv[0][0][0] = 0;
5161                     motion_y= s->mv[0][0][1] = 0;
5162                     break;
5163                 case CANDIDATE_MB_TYPE_INTER:
5164                     s->mv_dir = MV_DIR_FORWARD;
5165                     s->mb_intra= 0;
5166                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5167                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5168                     break;
5169                 case CANDIDATE_MB_TYPE_INTER_I:
5170                     s->mv_dir = MV_DIR_FORWARD;
5171                     s->mv_type = MV_TYPE_FIELD;
5172                     s->mb_intra= 0;
5173                     for(i=0; i<2; i++){
5174                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5175                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5176                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5177                     }
5178                     motion_x = motion_y = 0;
5179                     break;
5180                 case CANDIDATE_MB_TYPE_INTER4V:
5181                     s->mv_dir = MV_DIR_FORWARD;
5182                     s->mv_type = MV_TYPE_8X8;
5183                     s->mb_intra= 0;
5184                     for(i=0; i<4; i++){
5185                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5186                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5187                     }
5188                     motion_x= motion_y= 0;
5189                     break;
5190                 case CANDIDATE_MB_TYPE_DIRECT:
5191                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5192                     s->mb_intra= 0;
5193                     motion_x=s->b_direct_mv_table[xy][0];
5194                     motion_y=s->b_direct_mv_table[xy][1];
5195                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5196                     break;
5197                 case CANDIDATE_MB_TYPE_BIDIR:
5198                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5199                     s->mb_intra= 0;
5200                     motion_x=0;
5201                     motion_y=0;
5202                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5203                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5204                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5205                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5206                     break;
5207                 case CANDIDATE_MB_TYPE_BACKWARD:
5208                     s->mv_dir = MV_DIR_BACKWARD;
5209                     s->mb_intra= 0;
5210                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5211                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5212                     break;
5213                 case CANDIDATE_MB_TYPE_FORWARD:
5214                     s->mv_dir = MV_DIR_FORWARD;
5215                     s->mb_intra= 0;
5216                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5217                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5218 //                    printf(" %d %d ", motion_x, motion_y);
5219                     break;
5220                 case CANDIDATE_MB_TYPE_FORWARD_I:
5221                     s->mv_dir = MV_DIR_FORWARD;
5222                     s->mv_type = MV_TYPE_FIELD;
5223                     s->mb_intra= 0;
5224                     for(i=0; i<2; i++){
5225                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5226                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5227                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5228                     }
5229                     motion_x=motion_y=0;
5230                     break;
5231                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5232                     s->mv_dir = MV_DIR_BACKWARD;
5233                     s->mv_type = MV_TYPE_FIELD;
5234                     s->mb_intra= 0;
5235                     for(i=0; i<2; i++){
5236                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5237                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5238                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5239                     }
5240                     motion_x=motion_y=0;
5241                     break;
5242                 case CANDIDATE_MB_TYPE_BIDIR_I:
5243                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5244                     s->mv_type = MV_TYPE_FIELD;
5245                     s->mb_intra= 0;
5246                     for(dir=0; dir<2; dir++){
5247                         for(i=0; i<2; i++){
5248                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5249                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5250                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5251                         }
5252                     }
5253                     motion_x=motion_y=0;
5254                     break;
5255                 default:
5256                     motion_x=motion_y=0; //gcc warning fix
5257                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5258                 }
5259
5260                 encode_mb(s, motion_x, motion_y);
5261
5262                 // RAL: Update last macroblock type
5263                 s->last_mv_dir = s->mv_dir;
5264
5265                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5266                     ff_h263_update_motion_val(s);
5267
5268                 MPV_decode_mb(s, s->block);
5269             }
5270
5271             /* clean the MV table in IPS frames for direct mode in B frames */
5272             if(s->mb_intra /* && I,P,S_TYPE */){
5273                 s->p_mv_table[xy][0]=0;
5274                 s->p_mv_table[xy][1]=0;
5275             }
5276
5277             if(s->flags&CODEC_FLAG_PSNR){
5278                 int w= 16;
5279                 int h= 16;
5280
5281                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5282                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5283
5284                 s->current_picture.error[0] += sse(
5285                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5286                     s->dest[0], w, h, s->linesize);
5287                 s->current_picture.error[1] += sse(
5288                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5289                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5290                 s->current_picture.error[2] += sse(
5291                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5292                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5293             }
5294             if(s->loop_filter){
5295                 if(s->out_format == FMT_H263)
5296                     ff_h263_loop_filter(s);
5297             }
5298 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5299         }
5300     }
5301
5302     //not beautiful here but we must write it before flushing so it has to be here
5303     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5304         msmpeg4_encode_ext_header(s);
5305
5306     write_slice_end(s);
5307
5308     /* Send the last GOB if RTP */
5309     if (s->avctx->rtp_callback) {
5310         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5311         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5312         /* Call the RTP callback to send the last GOB */
5313         emms_c();
5314         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5315     }
5316
5317     return 0;
5318 }
5319
5320 #define MERGE(field) dst->field += src->field; src->field=0
5321 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5322     MERGE(me.scene_change_score);
5323     MERGE(me.mc_mb_var_sum_temp);
5324     MERGE(me.mb_var_sum_temp);
5325 }
5326
5327 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5328     int i;
5329
5330     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5331     MERGE(dct_count[1]);
5332     MERGE(mv_bits);
5333     MERGE(i_tex_bits);
5334     MERGE(p_tex_bits);
5335     MERGE(i_count);
5336     MERGE(f_count);
5337     MERGE(b_count);
5338     MERGE(skip_count);
5339     MERGE(misc_bits);
5340     MERGE(error_count);
5341     MERGE(padding_bug_score);
5342     MERGE(current_picture.error[0]);
5343     MERGE(current_picture.error[1]);
5344     MERGE(current_picture.error[2]);
5345
5346     if(dst->avctx->noise_reduction){
5347         for(i=0; i<64; i++){
5348             MERGE(dct_error_sum[0][i]);
5349             MERGE(dct_error_sum[1][i]);
5350         }
5351     }
5352
5353     assert(put_bits_count(&src->pb) % 8 ==0);
5354     assert(put_bits_count(&dst->pb) % 8 ==0);
5355     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5356     flush_put_bits(&dst->pb);
5357 }
5358
5359 static void estimate_qp(MpegEncContext *s, int dry_run){
5360     if (!s->fixed_qscale)
5361         s->current_picture_ptr->quality=
5362         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5363
5364     if(s->adaptive_quant){
5365         switch(s->codec_id){
5366         case CODEC_ID_MPEG4:
5367             ff_clean_mpeg4_qscales(s);
5368             break;
5369         case CODEC_ID_H263:
5370         case CODEC_ID_H263P:
5371         case CODEC_ID_FLV1:
5372             ff_clean_h263_qscales(s);
5373             break;
5374         }
5375
5376         s->lambda= s->lambda_table[0];
5377         //FIXME broken
5378     }else
5379         s->lambda= s->current_picture.quality;
5380 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5381     update_qscale(s);
5382 }
5383
5384 static void encode_picture(MpegEncContext *s, int picture_number)
5385 {
5386     int i;
5387     int bits;
5388
5389     s->picture_number = picture_number;
5390
5391     /* Reset the average MB variance */
5392     s->me.mb_var_sum_temp    =
5393     s->me.mc_mb_var_sum_temp = 0;
5394
5395     /* we need to initialize some time vars before we can encode b-frames */
5396     // RAL: Condition added for MPEG1VIDEO
5397     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5398         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5399
5400     s->me.scene_change_score=0;
5401
5402 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5403
5404     if(s->pict_type==I_TYPE){
5405         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5406         else                        s->no_rounding=0;
5407     }else if(s->pict_type!=B_TYPE){
5408         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5409             s->no_rounding ^= 1;
5410     }
5411
5412     if(s->flags & CODEC_FLAG_PASS2){
5413         estimate_qp(s, 1);
5414         ff_get_2pass_fcode(s);
5415     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5416         if(s->pict_type==B_TYPE)
5417             s->lambda= s->last_lambda_for[s->pict_type];
5418         else
5419             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5420         update_qscale(s);
5421     }
5422
5423     s->mb_intra=0; //for the rate distortion & bit compare functions
5424     for(i=1; i<s->avctx->thread_count; i++){
5425         ff_update_duplicate_context(s->thread_context[i], s);
5426     }
5427
5428     ff_init_me(s);
5429
5430     /* Estimate motion for every MB */
5431     if(s->pict_type != I_TYPE){
5432         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5433         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5434         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5435             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5436                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5437             }
5438         }
5439
5440         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5441     }else /* if(s->pict_type == I_TYPE) */{
5442         /* I-Frame */
5443         for(i=0; i<s->mb_stride*s->mb_height; i++)
5444             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5445
5446         if(!s->fixed_qscale){
5447             /* finding spatial complexity for I-frame rate control */
5448             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5449         }
5450     }
5451     for(i=1; i<s->avctx->thread_count; i++){
5452         merge_context_after_me(s, s->thread_context[i]);
5453     }
5454     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5455     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5456     emms_c();
5457
5458     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5459         s->pict_type= I_TYPE;
5460         for(i=0; i<s->mb_stride*s->mb_height; i++)
5461             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5462 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5463     }
5464
5465     if(!s->umvplus){
5466         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5467             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5468
5469             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5470                 int a,b;
5471                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5472                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5473                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5474             }
5475
5476             ff_fix_long_p_mvs(s);
5477             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5478             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5479                 int j;
5480                 for(i=0; i<2; i++){
5481                     for(j=0; j<2; j++)
5482                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5483                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5484                 }
5485             }
5486         }
5487
5488         if(s->pict_type==B_TYPE){
5489             int a, b;
5490
5491             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5492             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5493             s->f_code = FFMAX(a, b);
5494
5495             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5496             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5497             s->b_code = FFMAX(a, b);
5498
5499             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5500             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5501             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5502             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5503             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5504                 int dir, j;
5505                 for(dir=0; dir<2; dir++){
5506                     for(i=0; i<2; i++){
5507                         for(j=0; j<2; j++){
5508                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5509                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5510                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5511                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5512                         }
5513                     }
5514                 }
5515             }
5516         }
5517     }
5518
5519     estimate_qp(s, 0);
5520
5521     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5522         s->qscale= 3; //reduce clipping problems
5523
5524     if (s->out_format == FMT_MJPEG) {
5525         /* for mjpeg, we do include qscale in the matrix */
5526         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5527         for(i=1;i<64;i++){
5528             int j= s->dsp.idct_permutation[i];
5529
5530             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3) & 0xFF;
5531         }
5532         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5533                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5534         s->qscale= 8;
5535     }
5536
5537     //FIXME var duplication
5538     s->current_picture_ptr->key_frame=
5539     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5540     s->current_picture_ptr->pict_type=
5541     s->current_picture.pict_type= s->pict_type;
5542
5543     if(s->current_picture.key_frame)
5544         s->picture_in_gop_number=0;
5545
5546     s->last_bits= put_bits_count(&s->pb);
5547     switch(s->out_format) {
5548     case FMT_MJPEG:
5549         mjpeg_picture_header(s);
5550         break;
5551 #ifdef CONFIG_H261_ENCODER
5552     case FMT_H261:
5553         ff_h261_encode_picture_header(s, picture_number);
5554         break;
5555 #endif
5556     case FMT_H263:
5557         if (s->codec_id == CODEC_ID_WMV2)
5558             ff_wmv2_encode_picture_header(s, picture_number);
5559         else if (s->h263_msmpeg4)
5560             msmpeg4_encode_picture_header(s, picture_number);
5561         else if (s->h263_pred)
5562             mpeg4_encode_picture_header(s, picture_number);
5563 #ifdef CONFIG_RV10_ENCODER
5564         else if (s->codec_id == CODEC_ID_RV10)
5565             rv10_encode_picture_header(s, picture_number);
5566 #endif
5567 #ifdef CONFIG_RV20_ENCODER
5568         else if (s->codec_id == CODEC_ID_RV20)
5569             rv20_encode_picture_header(s, picture_number);
5570 #endif
5571         else if (s->codec_id == CODEC_ID_FLV1)
5572             ff_flv_encode_picture_header(s, picture_number);
5573         else
5574             h263_encode_picture_header(s, picture_number);
5575         break;
5576     case FMT_MPEG1:
5577         mpeg1_encode_picture_header(s, picture_number);
5578         break;
5579     case FMT_H264:
5580         break;
5581     default:
5582         assert(0);
5583     }
5584     bits= put_bits_count(&s->pb);
5585     s->header_bits= bits - s->last_bits;
5586
5587     for(i=1; i<s->avctx->thread_count; i++){
5588         update_duplicate_context_after_me(s->thread_context[i], s);
5589     }
5590     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5591     for(i=1; i<s->avctx->thread_count; i++){
5592         merge_context_after_encode(s, s->thread_context[i]);
5593     }
5594     emms_c();
5595 }
5596
5597 #endif //CONFIG_ENCODERS
5598
5599 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5600     const int intra= s->mb_intra;
5601     int i;
5602
5603     s->dct_count[intra]++;
5604
5605     for(i=0; i<64; i++){
5606         int level= block[i];
5607
5608         if(level){
5609             if(level>0){
5610                 s->dct_error_sum[intra][i] += level;
5611                 level -= s->dct_offset[intra][i];
5612                 if(level<0) level=0;
5613             }else{
5614                 s->dct_error_sum[intra][i] -= level;
5615                 level += s->dct_offset[intra][i];
5616                 if(level>0) level=0;
5617             }
5618             block[i]= level;
5619         }
5620     }
5621 }
5622
5623 #ifdef CONFIG_ENCODERS
5624
5625 static int dct_quantize_trellis_c(MpegEncContext *s,
5626                         DCTELEM *block, int n,
5627                         int qscale, int *overflow){
5628     const int *qmat;
5629     const uint8_t *scantable= s->intra_scantable.scantable;
5630     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5631     int max=0;
5632     unsigned int threshold1, threshold2;
5633     int bias=0;
5634     int run_tab[65];
5635     int level_tab[65];
5636     int score_tab[65];
5637     int survivor[65];
5638     int survivor_count;
5639     int last_run=0;
5640     int last_level=0;
5641     int last_score= 0;
5642     int last_i;
5643     int coeff[2][64];
5644     int coeff_count[64];
5645     int qmul, qadd, start_i, last_non_zero, i, dc;
5646     const int esc_length= s->ac_esc_length;
5647     uint8_t * length;
5648     uint8_t * last_length;
5649     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5650
5651     s->dsp.fdct (block);
5652
5653     if(s->dct_error_sum)
5654         s->denoise_dct(s, block);
5655     qmul= qscale*16;
5656     qadd= ((qscale-1)|1)*8;
5657
5658     if (s->mb_intra) {
5659         int q;
5660         if (!s->h263_aic) {
5661             if (n < 4)
5662                 q = s->y_dc_scale;
5663             else
5664                 q = s->c_dc_scale;
5665             q = q << 3;
5666         } else{
5667             /* For AIC we skip quant/dequant of INTRADC */
5668             q = 1 << 3;
5669             qadd=0;
5670         }
5671
5672         /* note: block[0] is assumed to be positive */
5673         block[0] = (block[0] + (q >> 1)) / q;
5674         start_i = 1;
5675         last_non_zero = 0;
5676         qmat = s->q_intra_matrix[qscale];
5677         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5678             bias= 1<<(QMAT_SHIFT-1);
5679         length     = s->intra_ac_vlc_length;
5680         last_length= s->intra_ac_vlc_last_length;
5681     } else {
5682         start_i = 0;
5683         last_non_zero = -1;
5684         qmat = s->q_inter_matrix[qscale];
5685         length     = s->inter_ac_vlc_length;
5686         last_length= s->inter_ac_vlc_last_length;
5687     }
5688     last_i= start_i;
5689
5690     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5691     threshold2= (threshold1<<1);
5692
5693     for(i=63; i>=start_i; i--) {
5694         const int j = scantable[i];
5695         int level = block[j] * qmat[j];
5696
5697         if(((unsigned)(level+threshold1))>threshold2){
5698             last_non_zero = i;
5699             break;
5700         }
5701     }
5702
5703     for(i=start_i; i<=last_non_zero; i++) {
5704         const int j = scantable[i];
5705         int level = block[j] * qmat[j];
5706
5707 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5708 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5709         if(((unsigned)(level+threshold1))>threshold2){
5710             if(level>0){
5711                 level= (bias + level)>>QMAT_SHIFT;
5712                 coeff[0][i]= level;
5713                 coeff[1][i]= level-1;
5714 //                coeff[2][k]= level-2;
5715             }else{
5716                 level= (bias - level)>>QMAT_SHIFT;
5717                 coeff[0][i]= -level;
5718                 coeff[1][i]= -level+1;
5719 //                coeff[2][k]= -level+2;
5720             }
5721             coeff_count[i]= FFMIN(level, 2);
5722             assert(coeff_count[i]);
5723             max |=level;
5724         }else{
5725             coeff[0][i]= (level>>31)|1;
5726             coeff_count[i]= 1;
5727         }
5728     }
5729
5730     *overflow= s->max_qcoeff < max; //overflow might have happened
5731
5732     if(last_non_zero < start_i){
5733         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5734         return last_non_zero;
5735     }
5736
5737     score_tab[start_i]= 0;
5738     survivor[0]= start_i;
5739     survivor_count= 1;
5740
5741     for(i=start_i; i<=last_non_zero; i++){
5742         int level_index, j;
5743         const int dct_coeff= ABS(block[ scantable[i] ]);
5744         const int zero_distoration= dct_coeff*dct_coeff;
5745         int best_score=256*256*256*120;
5746         for(level_index=0; level_index < coeff_count[i]; level_index++){
5747             int distoration;
5748             int level= coeff[level_index][i];
5749             const int alevel= ABS(level);
5750             int unquant_coeff;
5751
5752             assert(level);
5753
5754             if(s->out_format == FMT_H263){
5755                 unquant_coeff= alevel*qmul + qadd;
5756             }else{ //MPEG1
5757                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5758                 if(s->mb_intra){
5759                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5760                         unquant_coeff =   (unquant_coeff - 1) | 1;
5761                 }else{
5762                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5763                         unquant_coeff =   (unquant_coeff - 1) | 1;
5764                 }
5765                 unquant_coeff<<= 3;
5766             }
5767
5768             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5769             level+=64;
5770             if((level&(~127)) == 0){
5771                 for(j=survivor_count-1; j>=0; j--){
5772                     int run= i - survivor[j];
5773                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5774                     score += score_tab[i-run];
5775
5776                     if(score < best_score){
5777                         best_score= score;
5778                         run_tab[i+1]= run;
5779                         level_tab[i+1]= level-64;
5780                     }
5781                 }
5782
5783                 if(s->out_format == FMT_H263){
5784                     for(j=survivor_count-1; j>=0; j--){
5785                         int run= i - survivor[j];
5786                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5787                         score += score_tab[i-run];
5788                         if(score < last_score){
5789                             last_score= score;
5790                             last_run= run;
5791                             last_level= level-64;
5792                             last_i= i+1;
5793                         }
5794                     }
5795                 }
5796             }else{
5797                 distoration += esc_length*lambda;
5798                 for(j=survivor_count-1; j>=0; j--){
5799                     int run= i - survivor[j];
5800                     int score= distoration + score_tab[i-run];
5801
5802                     if(score < best_score){
5803                         best_score= score;
5804                         run_tab[i+1]= run;
5805                         level_tab[i+1]= level-64;
5806                     }
5807                 }
5808
5809                 if(s->out_format == FMT_H263){
5810                   for(j=survivor_count-1; j>=0; j--){
5811                         int run= i - survivor[j];
5812                         int score= distoration + score_tab[i-run];
5813                         if(score < last_score){
5814                             last_score= score;
5815                             last_run= run;
5816                             last_level= level-64;
5817                             last_i= i+1;
5818                         }
5819                     }
5820                 }
5821             }
5822         }
5823
5824         score_tab[i+1]= best_score;
5825
5826         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5827         if(last_non_zero <= 27){
5828             for(; survivor_count; survivor_count--){
5829                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5830                     break;
5831             }
5832         }else{
5833             for(; survivor_count; survivor_count--){
5834                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5835                     break;
5836             }
5837         }
5838
5839         survivor[ survivor_count++ ]= i+1;
5840     }
5841
5842     if(s->out_format != FMT_H263){
5843         last_score= 256*256*256*120;
5844         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5845             int score= score_tab[i];
5846             if(i) score += lambda*2; //FIXME exacter?
5847
5848             if(score < last_score){
5849                 last_score= score;
5850                 last_i= i;
5851                 last_level= level_tab[i];
5852                 last_run= run_tab[i];
5853             }
5854         }
5855     }
5856
5857     s->coded_score[n] = last_score;
5858
5859     dc= ABS(block[0]);
5860     last_non_zero= last_i - 1;
5861     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5862
5863     if(last_non_zero < start_i)
5864         return last_non_zero;
5865
5866     if(last_non_zero == 0 && start_i == 0){
5867         int best_level= 0;
5868         int best_score= dc * dc;
5869
5870         for(i=0; i<coeff_count[0]; i++){
5871             int level= coeff[i][0];
5872             int alevel= ABS(level);
5873             int unquant_coeff, score, distortion;
5874
5875             if(s->out_format == FMT_H263){
5876                     unquant_coeff= (alevel*qmul + qadd)>>3;
5877             }else{ //MPEG1
5878                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5879                     unquant_coeff =   (unquant_coeff - 1) | 1;
5880             }
5881             unquant_coeff = (unquant_coeff + 4) >> 3;
5882             unquant_coeff<<= 3 + 3;
5883
5884             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5885             level+=64;
5886             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5887             else                    score= distortion + esc_length*lambda;
5888
5889             if(score < best_score){
5890                 best_score= score;
5891                 best_level= level - 64;
5892             }
5893         }
5894         block[0]= best_level;
5895         s->coded_score[n] = best_score - dc*dc;
5896         if(best_level == 0) return -1;
5897         else                return last_non_zero;
5898     }
5899
5900     i= last_i;
5901     assert(last_level);
5902
5903     block[ perm_scantable[last_non_zero] ]= last_level;
5904     i -= last_run + 1;
5905
5906     for(; i>start_i; i -= run_tab[i] + 1){
5907         block[ perm_scantable[i-1] ]= level_tab[i];
5908     }
5909
5910     return last_non_zero;
5911 }
5912
5913 //#define REFINE_STATS 1
5914 static int16_t basis[64][64];
5915
5916 static void build_basis(uint8_t *perm){
5917     int i, j, x, y;
5918     emms_c();
5919     for(i=0; i<8; i++){
5920         for(j=0; j<8; j++){
5921             for(y=0; y<8; y++){
5922                 for(x=0; x<8; x++){
5923                     double s= 0.25*(1<<BASIS_SHIFT);
5924                     int index= 8*i + j;
5925                     int perm_index= perm[index];
5926                     if(i==0) s*= sqrt(0.5);
5927                     if(j==0) s*= sqrt(0.5);
5928                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5929                 }
5930             }
5931         }
5932     }
5933 }
5934
5935 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5936                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5937                         int n, int qscale){
5938     int16_t rem[64];
5939     DCTELEM d1[64] __align16;
5940     const int *qmat;
5941     const uint8_t *scantable= s->intra_scantable.scantable;
5942     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5943 //    unsigned int threshold1, threshold2;
5944 //    int bias=0;
5945     int run_tab[65];
5946     int prev_run=0;
5947     int prev_level=0;
5948     int qmul, qadd, start_i, last_non_zero, i, dc;
5949     uint8_t * length;
5950     uint8_t * last_length;
5951     int lambda;
5952     int rle_index, run, q, sum;
5953 #ifdef REFINE_STATS
5954 static int count=0;
5955 static int after_last=0;
5956 static int to_zero=0;
5957 static int from_zero=0;
5958 static int raise=0;
5959 static int lower=0;
5960 static int messed_sign=0;
5961 #endif
5962
5963     if(basis[0][0] == 0)
5964         build_basis(s->dsp.idct_permutation);
5965
5966     qmul= qscale*2;
5967     qadd= (qscale-1)|1;
5968     if (s->mb_intra) {
5969         if (!s->h263_aic) {
5970             if (n < 4)
5971                 q = s->y_dc_scale;
5972             else
5973                 q = s->c_dc_scale;
5974         } else{
5975             /* For AIC we skip quant/dequant of INTRADC */
5976             q = 1;
5977             qadd=0;
5978         }
5979         q <<= RECON_SHIFT-3;
5980         /* note: block[0] is assumed to be positive */
5981         dc= block[0]*q;
5982 //        block[0] = (block[0] + (q >> 1)) / q;
5983         start_i = 1;
5984         qmat = s->q_intra_matrix[qscale];
5985 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5986 //            bias= 1<<(QMAT_SHIFT-1);
5987         length     = s->intra_ac_vlc_length;
5988         last_length= s->intra_ac_vlc_last_length;
5989     } else {
5990         dc= 0;
5991         start_i = 0;
5992         qmat = s->q_inter_matrix[qscale];
5993         length     = s->inter_ac_vlc_length;
5994         last_length= s->inter_ac_vlc_last_length;
5995     }
5996     last_non_zero = s->block_last_index[n];
5997
5998 #ifdef REFINE_STATS
5999 {START_TIMER
6000 #endif
6001     dc += (1<<(RECON_SHIFT-1));
6002     for(i=0; i<64; i++){
6003         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6004     }
6005 #ifdef REFINE_STATS
6006 STOP_TIMER("memset rem[]")}
6007 #endif
6008     sum=0;
6009     for(i=0; i<64; i++){
6010         int one= 36;
6011         int qns=4;
6012         int w;
6013
6014         w= ABS(weight[i]) + qns*one;
6015         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6016
6017         weight[i] = w;
6018 //        w=weight[i] = (63*qns + (w/2)) / w;
6019
6020         assert(w>0);
6021         assert(w<(1<<6));
6022         sum += w*w;
6023     }
6024     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6025 #ifdef REFINE_STATS
6026 {START_TIMER
6027 #endif
6028     run=0;
6029     rle_index=0;
6030     for(i=start_i; i<=last_non_zero; i++){
6031         int j= perm_scantable[i];
6032         const int level= block[j];
6033         int coeff;
6034
6035         if(level){
6036             if(level<0) coeff= qmul*level - qadd;
6037             else        coeff= qmul*level + qadd;
6038             run_tab[rle_index++]=run;
6039             run=0;
6040
6041             s->dsp.add_8x8basis(rem, basis[j], coeff);
6042         }else{
6043             run++;
6044         }
6045     }
6046 #ifdef REFINE_STATS
6047 if(last_non_zero>0){
6048 STOP_TIMER("init rem[]")
6049 }
6050 }
6051
6052 {START_TIMER
6053 #endif
6054     for(;;){
6055         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6056         int best_coeff=0;
6057         int best_change=0;
6058         int run2, best_unquant_change=0, analyze_gradient;
6059 #ifdef REFINE_STATS
6060 {START_TIMER
6061 #endif
6062         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6063
6064         if(analyze_gradient){
6065 #ifdef REFINE_STATS
6066 {START_TIMER
6067 #endif
6068             for(i=0; i<64; i++){
6069                 int w= weight[i];
6070
6071                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6072             }
6073 #ifdef REFINE_STATS
6074 STOP_TIMER("rem*w*w")}
6075 {START_TIMER
6076 #endif
6077             s->dsp.fdct(d1);
6078 #ifdef REFINE_STATS
6079 STOP_TIMER("dct")}
6080 #endif
6081         }
6082
6083         if(start_i){
6084             const int level= block[0];
6085             int change, old_coeff;
6086
6087             assert(s->mb_intra);
6088
6089             old_coeff= q*level;
6090
6091             for(change=-1; change<=1; change+=2){
6092                 int new_level= level + change;
6093                 int score, new_coeff;
6094
6095                 new_coeff= q*new_level;
6096                 if(new_coeff >= 2048 || new_coeff < 0)
6097                     continue;
6098
6099                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6100                 if(score<best_score){
6101                     best_score= score;
6102                     best_coeff= 0;
6103                     best_change= change;
6104                     best_unquant_change= new_coeff - old_coeff;
6105                 }
6106             }
6107         }
6108
6109         run=0;
6110         rle_index=0;
6111         run2= run_tab[rle_index++];
6112         prev_level=0;
6113         prev_run=0;
6114
6115         for(i=start_i; i<64; i++){
6116             int j= perm_scantable[i];
6117             const int level= block[j];
6118             int change, old_coeff;
6119
6120             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6121                 break;
6122
6123             if(level){
6124                 if(level<0) old_coeff= qmul*level - qadd;
6125                 else        old_coeff= qmul*level + qadd;
6126                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6127             }else{
6128                 old_coeff=0;
6129                 run2--;
6130                 assert(run2>=0 || i >= last_non_zero );
6131             }
6132
6133             for(change=-1; change<=1; change+=2){
6134                 int new_level= level + change;
6135                 int score, new_coeff, unquant_change;
6136
6137                 score=0;
6138                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
6139                    continue;
6140
6141                 if(new_level){
6142                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6143                     else            new_coeff= qmul*new_level + qadd;
6144                     if(new_coeff >= 2048 || new_coeff <= -2048)
6145                         continue;
6146                     //FIXME check for overflow
6147
6148                     if(level){
6149                         if(level < 63 && level > -63){
6150                             if(i < last_non_zero)
6151                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6152                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6153                             else
6154                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6155                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6156                         }
6157                     }else{
6158                         assert(ABS(new_level)==1);
6159
6160                         if(analyze_gradient){
6161                             int g= d1[ scantable[i] ];
6162                             if(g && (g^new_level) >= 0)
6163                                 continue;
6164                         }
6165
6166                         if(i < last_non_zero){
6167                             int next_i= i + run2 + 1;
6168                             int next_level= block[ perm_scantable[next_i] ] + 64;
6169
6170                             if(next_level&(~127))
6171                                 next_level= 0;
6172
6173                             if(next_i < last_non_zero)
6174                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6175                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6176                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6177                             else
6178                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6179                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6180                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6181                         }else{
6182                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6183                             if(prev_level){
6184                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6185                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6186                             }
6187                         }
6188                     }
6189                 }else{
6190                     new_coeff=0;
6191                     assert(ABS(level)==1);
6192
6193                     if(i < last_non_zero){
6194                         int next_i= i + run2 + 1;
6195                         int next_level= block[ perm_scantable[next_i] ] + 64;
6196
6197                         if(next_level&(~127))
6198                             next_level= 0;
6199
6200                         if(next_i < last_non_zero)
6201                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6202                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6203                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6204                         else
6205                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6206                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6207                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6208                     }else{
6209                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6210                         if(prev_level){
6211                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6212                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6213                         }
6214                     }
6215                 }
6216
6217                 score *= lambda;
6218
6219                 unquant_change= new_coeff - old_coeff;
6220                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6221
6222                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6223                 if(score<best_score){
6224                     best_score= score;
6225                     best_coeff= i;
6226                     best_change= change;
6227                     best_unquant_change= unquant_change;
6228                 }
6229             }
6230             if(level){
6231                 prev_level= level + 64;
6232                 if(prev_level&(~127))
6233                     prev_level= 0;
6234                 prev_run= run;
6235                 run=0;
6236             }else{
6237                 run++;
6238             }
6239         }
6240 #ifdef REFINE_STATS
6241 STOP_TIMER("iterative step")}
6242 #endif
6243
6244         if(best_change){
6245             int j= perm_scantable[ best_coeff ];
6246
6247             block[j] += best_change;
6248
6249             if(best_coeff > last_non_zero){
6250                 last_non_zero= best_coeff;
6251                 assert(block[j]);
6252 #ifdef REFINE_STATS
6253 after_last++;
6254 #endif
6255             }else{
6256 #ifdef REFINE_STATS
6257 if(block[j]){
6258     if(block[j] - best_change){
6259         if(ABS(block[j]) > ABS(block[j] - best_change)){
6260             raise++;
6261         }else{
6262             lower++;
6263         }
6264     }else{
6265         from_zero++;
6266     }
6267 }else{
6268     to_zero++;
6269 }
6270 #endif
6271                 for(; last_non_zero>=start_i; last_non_zero--){
6272                     if(block[perm_scantable[last_non_zero]])
6273                         break;
6274                 }
6275             }
6276 #ifdef REFINE_STATS
6277 count++;
6278 if(256*256*256*64 % count == 0){
6279     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6280 }
6281 #endif
6282             run=0;
6283             rle_index=0;
6284             for(i=start_i; i<=last_non_zero; i++){
6285                 int j= perm_scantable[i];
6286                 const int level= block[j];
6287
6288                  if(level){
6289                      run_tab[rle_index++]=run;
6290                      run=0;
6291                  }else{
6292                      run++;
6293                  }
6294             }
6295
6296             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6297         }else{
6298             break;
6299         }
6300     }
6301 #ifdef REFINE_STATS
6302 if(last_non_zero>0){
6303 STOP_TIMER("iterative search")
6304 }
6305 }
6306 #endif
6307
6308     return last_non_zero;
6309 }
6310
6311 static int dct_quantize_c(MpegEncContext *s,
6312                         DCTELEM *block, int n,
6313                         int qscale, int *overflow)
6314 {
6315     int i, j, level, last_non_zero, q, start_i;
6316     const int *qmat;
6317     const uint8_t *scantable= s->intra_scantable.scantable;
6318     int bias;
6319     int max=0;
6320     unsigned int threshold1, threshold2;
6321
6322     s->dsp.fdct (block);
6323
6324     if(s->dct_error_sum)
6325         s->denoise_dct(s, block);
6326
6327     if (s->mb_intra) {
6328         if (!s->h263_aic) {
6329             if (n < 4)
6330                 q = s->y_dc_scale;
6331             else
6332                 q = s->c_dc_scale;
6333             q = q << 3;
6334         } else
6335             /* For AIC we skip quant/dequant of INTRADC */
6336             q = 1 << 3;
6337
6338         /* note: block[0] is assumed to be positive */
6339         block[0] = (block[0] + (q >> 1)) / q;
6340         start_i = 1;
6341         last_non_zero = 0;
6342         qmat = s->q_intra_matrix[qscale];
6343         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6344     } else {
6345         start_i = 0;
6346         last_non_zero = -1;
6347         qmat = s->q_inter_matrix[qscale];
6348         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6349     }
6350     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6351     threshold2= (threshold1<<1);
6352     for(i=63;i>=start_i;i--) {
6353         j = scantable[i];
6354         level = block[j] * qmat[j];
6355
6356         if(((unsigned)(level+threshold1))>threshold2){
6357             last_non_zero = i;
6358             break;
6359         }else{
6360             block[j]=0;
6361         }
6362     }
6363     for(i=start_i; i<=last_non_zero; i++) {
6364         j = scantable[i];
6365         level = block[j] * qmat[j];
6366
6367 //        if(   bias+level >= (1<<QMAT_SHIFT)
6368 //           || bias-level >= (1<<QMAT_SHIFT)){
6369         if(((unsigned)(level+threshold1))>threshold2){
6370             if(level>0){
6371                 level= (bias + level)>>QMAT_SHIFT;
6372                 block[j]= level;
6373             }else{
6374                 level= (bias - level)>>QMAT_SHIFT;
6375                 block[j]= -level;
6376             }
6377             max |=level;
6378         }else{
6379             block[j]=0;
6380         }
6381     }
6382     *overflow= s->max_qcoeff < max; //overflow might have happened
6383
6384     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6385     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6386         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6387
6388     return last_non_zero;
6389 }
6390
6391 #endif //CONFIG_ENCODERS
6392
6393 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6394                                    DCTELEM *block, int n, int qscale)
6395 {
6396     int i, level, nCoeffs;
6397     const uint16_t *quant_matrix;
6398
6399     nCoeffs= s->block_last_index[n];
6400
6401     if (n < 4)
6402         block[0] = block[0] * s->y_dc_scale;
6403     else
6404         block[0] = block[0] * s->c_dc_scale;
6405     /* XXX: only mpeg1 */
6406     quant_matrix = s->intra_matrix;
6407     for(i=1;i<=nCoeffs;i++) {
6408         int j= s->intra_scantable.permutated[i];
6409         level = block[j];
6410         if (level) {
6411             if (level < 0) {
6412                 level = -level;
6413                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6414                 level = (level - 1) | 1;
6415                 level = -level;
6416             } else {
6417                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6418                 level = (level - 1) | 1;
6419             }
6420             block[j] = level;
6421         }
6422     }
6423 }
6424
6425 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6426                                    DCTELEM *block, int n, int qscale)
6427 {
6428     int i, level, nCoeffs;
6429     const uint16_t *quant_matrix;
6430
6431     nCoeffs= s->block_last_index[n];
6432
6433     quant_matrix = s->inter_matrix;
6434     for(i=0; i<=nCoeffs; i++) {
6435         int j= s->intra_scantable.permutated[i];
6436         level = block[j];
6437         if (level) {
6438             if (level < 0) {
6439                 level = -level;
6440                 level = (((level << 1) + 1) * qscale *
6441                          ((int) (quant_matrix[j]))) >> 4;
6442                 level = (level - 1) | 1;
6443                 level = -level;
6444             } else {
6445                 level = (((level << 1) + 1) * qscale *
6446                          ((int) (quant_matrix[j]))) >> 4;
6447                 level = (level - 1) | 1;
6448             }
6449             block[j] = level;
6450         }
6451     }
6452 }
6453
6454 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6455                                    DCTELEM *block, int n, int qscale)
6456 {
6457     int i, level, nCoeffs;
6458     const uint16_t *quant_matrix;
6459
6460     if(s->alternate_scan) nCoeffs= 63;
6461     else nCoeffs= s->block_last_index[n];
6462
6463     if (n < 4)
6464         block[0] = block[0] * s->y_dc_scale;
6465     else
6466         block[0] = block[0] * s->c_dc_scale;
6467     quant_matrix = s->intra_matrix;
6468     for(i=1;i<=nCoeffs;i++) {
6469         int j= s->intra_scantable.permutated[i];
6470         level = block[j];
6471         if (level) {
6472             if (level < 0) {
6473                 level = -level;
6474                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6475                 level = -level;
6476             } else {
6477                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6478             }
6479             block[j] = level;
6480         }
6481     }
6482 }
6483
6484 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6485                                    DCTELEM *block, int n, int qscale)
6486 {
6487     int i, level, nCoeffs;
6488     const uint16_t *quant_matrix;
6489     int sum=-1;
6490
6491     if(s->alternate_scan) nCoeffs= 63;
6492     else nCoeffs= s->block_last_index[n];
6493
6494     quant_matrix = s->inter_matrix;
6495     for(i=0; i<=nCoeffs; i++) {
6496         int j= s->intra_scantable.permutated[i];
6497         level = block[j];
6498         if (level) {
6499             if (level < 0) {
6500                 level = -level;
6501                 level = (((level << 1) + 1) * qscale *
6502                          ((int) (quant_matrix[j]))) >> 4;
6503                 level = -level;
6504             } else {
6505                 level = (((level << 1) + 1) * qscale *
6506                          ((int) (quant_matrix[j]))) >> 4;
6507             }
6508             block[j] = level;
6509             sum+=level;
6510         }
6511     }
6512     block[63]^=sum&1;
6513 }
6514
6515 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6516                                   DCTELEM *block, int n, int qscale)
6517 {
6518     int i, level, qmul, qadd;
6519     int nCoeffs;
6520
6521     assert(s->block_last_index[n]>=0);
6522
6523     qmul = qscale << 1;
6524
6525     if (!s->h263_aic) {
6526         if (n < 4)
6527             block[0] = block[0] * s->y_dc_scale;
6528         else
6529             block[0] = block[0] * s->c_dc_scale;
6530         qadd = (qscale - 1) | 1;
6531     }else{
6532         qadd = 0;
6533     }
6534     if(s->ac_pred)
6535         nCoeffs=63;
6536     else
6537         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6538
6539     for(i=1; i<=nCoeffs; i++) {
6540         level = block[i];
6541         if (level) {
6542             if (level < 0) {
6543                 level = level * qmul - qadd;
6544             } else {
6545                 level = level * qmul + qadd;
6546             }
6547             block[i] = level;
6548         }
6549     }
6550 }
6551
6552 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6553                                   DCTELEM *block, int n, int qscale)
6554 {
6555     int i, level, qmul, qadd;
6556     int nCoeffs;
6557
6558     assert(s->block_last_index[n]>=0);
6559
6560     qadd = (qscale - 1) | 1;
6561     qmul = qscale << 1;
6562
6563     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6564
6565     for(i=0; i<=nCoeffs; i++) {
6566         level = block[i];
6567         if (level) {
6568             if (level < 0) {
6569                 level = level * qmul - qadd;
6570             } else {
6571                 level = level * qmul + qadd;
6572             }
6573             block[i] = level;
6574         }
6575     }
6576 }
6577
6578 #ifdef CONFIG_ENCODERS
6579 AVCodec h263_encoder = {
6580     "h263",
6581     CODEC_TYPE_VIDEO,
6582     CODEC_ID_H263,
6583     sizeof(MpegEncContext),
6584     MPV_encode_init,
6585     MPV_encode_picture,
6586     MPV_encode_end,
6587     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6588 };
6589
6590 AVCodec h263p_encoder = {
6591     "h263p",
6592     CODEC_TYPE_VIDEO,
6593     CODEC_ID_H263P,
6594     sizeof(MpegEncContext),
6595     MPV_encode_init,
6596     MPV_encode_picture,
6597     MPV_encode_end,
6598     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6599 };
6600
6601 AVCodec flv_encoder = {
6602     "flv",
6603     CODEC_TYPE_VIDEO,
6604     CODEC_ID_FLV1,
6605     sizeof(MpegEncContext),
6606     MPV_encode_init,
6607     MPV_encode_picture,
6608     MPV_encode_end,
6609     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6610 };
6611
6612 AVCodec rv10_encoder = {
6613     "rv10",
6614     CODEC_TYPE_VIDEO,
6615     CODEC_ID_RV10,
6616     sizeof(MpegEncContext),
6617     MPV_encode_init,
6618     MPV_encode_picture,
6619     MPV_encode_end,
6620     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6621 };
6622
6623 AVCodec rv20_encoder = {
6624     "rv20",
6625     CODEC_TYPE_VIDEO,
6626     CODEC_ID_RV20,
6627     sizeof(MpegEncContext),
6628     MPV_encode_init,
6629     MPV_encode_picture,
6630     MPV_encode_end,
6631     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6632 };
6633
6634 AVCodec mpeg4_encoder = {
6635     "mpeg4",
6636     CODEC_TYPE_VIDEO,
6637     CODEC_ID_MPEG4,
6638     sizeof(MpegEncContext),
6639     MPV_encode_init,
6640     MPV_encode_picture,
6641     MPV_encode_end,
6642     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6643     .capabilities= CODEC_CAP_DELAY,
6644 };
6645
6646 AVCodec msmpeg4v1_encoder = {
6647     "msmpeg4v1",
6648     CODEC_TYPE_VIDEO,
6649     CODEC_ID_MSMPEG4V1,
6650     sizeof(MpegEncContext),
6651     MPV_encode_init,
6652     MPV_encode_picture,
6653     MPV_encode_end,
6654     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6655 };
6656
6657 AVCodec msmpeg4v2_encoder = {
6658     "msmpeg4v2",
6659     CODEC_TYPE_VIDEO,
6660     CODEC_ID_MSMPEG4V2,
6661     sizeof(MpegEncContext),
6662     MPV_encode_init,
6663     MPV_encode_picture,
6664     MPV_encode_end,
6665     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6666 };
6667
6668 AVCodec msmpeg4v3_encoder = {
6669     "msmpeg4",
6670     CODEC_TYPE_VIDEO,
6671     CODEC_ID_MSMPEG4V3,
6672     sizeof(MpegEncContext),
6673     MPV_encode_init,
6674     MPV_encode_picture,
6675     MPV_encode_end,
6676     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6677 };
6678
6679 AVCodec wmv1_encoder = {
6680     "wmv1",
6681     CODEC_TYPE_VIDEO,
6682     CODEC_ID_WMV1,
6683     sizeof(MpegEncContext),
6684     MPV_encode_init,
6685     MPV_encode_picture,
6686     MPV_encode_end,
6687     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6688 };
6689
6690 AVCodec mjpeg_encoder = {
6691     "mjpeg",
6692     CODEC_TYPE_VIDEO,
6693     CODEC_ID_MJPEG,
6694     sizeof(MpegEncContext),
6695     MPV_encode_init,
6696     MPV_encode_picture,
6697     MPV_encode_end,
6698     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6699 };
6700
6701 #endif //CONFIG_ENCODERS