]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
minor simplifications in cabac_mb_type
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  *
20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
21  */
22
23 /**
24  * @file mpegvideo.c
25  * The simplest mpeg encoder (well, it was the simplest!).
26  */
27
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "mpegvideo.h"
31 #include "faandct.h"
32 #include <limits.h>
33
34 #ifdef USE_FASTMEMCPY
35 #include "fastmemcpy.h"
36 #endif
37
38 //#undef NDEBUG
39 //#include <assert.h>
40
41 #ifdef CONFIG_ENCODERS
42 static void encode_picture(MpegEncContext *s, int picture_number);
43 #endif //CONFIG_ENCODERS
44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
53                                   DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
55                                   DCTELEM *block, int n, int qscale);
56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
57 #ifdef CONFIG_ENCODERS
58 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
59 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
60 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
61 static int sse_mb(MpegEncContext *s);
62 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
63 #endif //CONFIG_ENCODERS
64
65 #ifdef HAVE_XVMC
66 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
67 extern void XVMC_field_end(MpegEncContext *s);
68 extern void XVMC_decode_mb(MpegEncContext *s);
69 #endif
70
71 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
72
73
74 /* enable all paranoid tests for rounding, overflows, etc... */
75 //#define PARANOID
76
77 //#define DEBUG
78
79
80 /* for jpeg fast DCT */
81 #define CONST_BITS 14
82
83 static const uint16_t aanscales[64] = {
84     /* precomputed values scaled up by 14 bits */
85     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
86     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
87     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
88     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
90     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
91     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
92     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
93 };
94
95 static const uint8_t h263_chroma_roundtab[16] = {
96 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
97     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
98 };
99
100 static const uint8_t ff_default_chroma_qscale_table[32]={
101 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
102     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
103 };
104
105 #ifdef CONFIG_ENCODERS
106 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
107 static uint8_t default_fcode_tab[MAX_MV*2+1];
108
109 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
110
111 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
112                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
113 {
114     int qscale;
115     int shift=0;
116
117     for(qscale=qmin; qscale<=qmax; qscale++){
118         int i;
119         if (dsp->fdct == ff_jpeg_fdct_islow
120 #ifdef FAAN_POSTSCALE
121             || dsp->fdct == ff_faandct
122 #endif
123             ) {
124             for(i=0;i<64;i++) {
125                 const int j= dsp->idct_permutation[i];
126                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
127                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
128                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
129                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
130
131                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
132                                 (qscale * quant_matrix[j]));
133             }
134         } else if (dsp->fdct == fdct_ifast
135 #ifndef FAAN_POSTSCALE
136                    || dsp->fdct == ff_faandct
137 #endif
138                    ) {
139             for(i=0;i<64;i++) {
140                 const int j= dsp->idct_permutation[i];
141                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
142                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
143                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
144                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
145
146                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
147                                 (aanscales[i] * qscale * quant_matrix[j]));
148             }
149         } else {
150             for(i=0;i<64;i++) {
151                 const int j= dsp->idct_permutation[i];
152                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
153                    So 16           <= qscale * quant_matrix[i]             <= 7905
154                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
155                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
156                 */
157                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
158 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
159                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
160
161                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
162                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
163             }
164         }
165
166         for(i=intra; i<64; i++){
167             int64_t max= 8191;
168             if (dsp->fdct == fdct_ifast
169 #ifndef FAAN_POSTSCALE
170                    || dsp->fdct == ff_faandct
171 #endif
172                    ) {
173                 max= (8191LL*aanscales[i]) >> 14;
174             }
175             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
176                 shift++;
177             }
178         }
179     }
180     if(shift){
181         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
182     }
183 }
184
185 static inline void update_qscale(MpegEncContext *s){
186     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
187     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
188
189     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
190 }
191 #endif //CONFIG_ENCODERS
192
193 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
194     int i;
195     int end;
196
197     st->scantable= src_scantable;
198
199     for(i=0; i<64; i++){
200         int j;
201         j = src_scantable[i];
202         st->permutated[i] = permutation[j];
203 #ifdef ARCH_POWERPC
204         st->inverse[j] = i;
205 #endif
206     }
207
208     end=-1;
209     for(i=0; i<64; i++){
210         int j;
211         j = st->permutated[i];
212         if(j>end) end=j;
213         st->raster_end[i]= end;
214     }
215 }
216
217 #ifdef CONFIG_ENCODERS
218 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
219     int i;
220
221     if(matrix){
222         put_bits(pb, 1, 1);
223         for(i=0;i<64;i++) {
224             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
225         }
226     }else
227         put_bits(pb, 1, 0);
228 }
229 #endif //CONFIG_ENCODERS
230
231 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
232     int i;
233
234     assert(p<=end);
235     if(p>=end)
236         return end;
237
238     for(i=0; i<3; i++){
239         uint32_t tmp= *state << 8;
240         *state= tmp + *(p++);
241         if(tmp == 0x100 || p==end)
242             return p;
243     }
244
245     while(p<end){
246         if     (p[-1] > 1      ) p+= 3;
247         else if(p[-2]          ) p+= 2;
248         else if(p[-3]|(p[-1]-1)) p++;
249         else{
250             p++;
251             break;
252         }
253     }
254
255     p= FFMIN(p, end)-4;
256     *state=  be2me_32(unaligned32(p));
257
258     return p+4;
259 }
260
261 /* init common dct for both encoder and decoder */
262 int DCT_common_init(MpegEncContext *s)
263 {
264     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
265     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
266     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
267     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
268     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
269     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
270
271 #ifdef CONFIG_ENCODERS
272     s->dct_quantize= dct_quantize_c;
273     s->denoise_dct= denoise_dct_c;
274 #endif //CONFIG_ENCODERS
275
276 #ifdef HAVE_MMX
277     MPV_common_init_mmx(s);
278 #endif
279 #ifdef ARCH_ALPHA
280     MPV_common_init_axp(s);
281 #endif
282 #ifdef HAVE_MLIB
283     MPV_common_init_mlib(s);
284 #endif
285 #ifdef HAVE_MMI
286     MPV_common_init_mmi(s);
287 #endif
288 #ifdef ARCH_ARMV4L
289     MPV_common_init_armv4l(s);
290 #endif
291 #ifdef ARCH_POWERPC
292     MPV_common_init_ppc(s);
293 #endif
294
295 #ifdef CONFIG_ENCODERS
296     s->fast_dct_quantize= s->dct_quantize;
297
298     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
299         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
300     }
301
302 #endif //CONFIG_ENCODERS
303
304     /* load & permutate scantables
305        note: only wmv uses different ones
306     */
307     if(s->alternate_scan){
308         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
309         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
310     }else{
311         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
312         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
313     }
314     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
315     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
316
317     return 0;
318 }
319
320 static void copy_picture(Picture *dst, Picture *src){
321     *dst = *src;
322     dst->type= FF_BUFFER_TYPE_COPY;
323 }
324
325 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
326     int i;
327
328     dst->pict_type              = src->pict_type;
329     dst->quality                = src->quality;
330     dst->coded_picture_number   = src->coded_picture_number;
331     dst->display_picture_number = src->display_picture_number;
332 //    dst->reference              = src->reference;
333     dst->pts                    = src->pts;
334     dst->interlaced_frame       = src->interlaced_frame;
335     dst->top_field_first        = src->top_field_first;
336
337     if(s->avctx->me_threshold){
338         if(!src->motion_val[0])
339             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
340         if(!src->mb_type)
341             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
342         if(!src->ref_index[0])
343             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
344         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
345             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
346             src->motion_subsample_log2, dst->motion_subsample_log2);
347
348         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
349
350         for(i=0; i<2; i++){
351             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
352             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
353
354             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
355                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
356             }
357             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
358                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
359             }
360         }
361     }
362 }
363
364 /**
365  * allocates a Picture
366  * The pixels are allocated/set by calling get_buffer() if shared=0
367  */
368 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
369     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
370     const int mb_array_size= s->mb_stride*s->mb_height;
371     const int b8_array_size= s->b8_stride*s->mb_height*2;
372     const int b4_array_size= s->b4_stride*s->mb_height*4;
373     int i;
374
375     if(shared){
376         assert(pic->data[0]);
377         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
378         pic->type= FF_BUFFER_TYPE_SHARED;
379     }else{
380         int r;
381
382         assert(!pic->data[0]);
383
384         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
385
386         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
387             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
388             return -1;
389         }
390
391         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
392             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
393             return -1;
394         }
395
396         if(pic->linesize[1] != pic->linesize[2]){
397             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
398             return -1;
399         }
400
401         s->linesize  = pic->linesize[0];
402         s->uvlinesize= pic->linesize[1];
403     }
404
405     if(pic->qscale_table==NULL){
406         if (s->encoding) {
407             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
408             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
409             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
410         }
411
412         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
413         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
414         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
415         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
416         if(s->out_format == FMT_H264){
417             for(i=0; i<2; i++){
418                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
419                 pic->motion_val[i]= pic->motion_val_base[i]+4;
420                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
421             }
422             pic->motion_subsample_log2= 2;
423         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
424             for(i=0; i<2; i++){
425                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
426                 pic->motion_val[i]= pic->motion_val_base[i]+4;
427                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
428             }
429             pic->motion_subsample_log2= 3;
430         }
431         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
432             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
433         }
434         pic->qstride= s->mb_stride;
435         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
436     }
437
438     //it might be nicer if the application would keep track of these but it would require a API change
439     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
440     s->prev_pict_types[0]= s->pict_type;
441     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
442         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
443
444     return 0;
445 fail: //for the CHECKED_ALLOCZ macro
446     return -1;
447 }
448
449 /**
450  * deallocates a picture
451  */
452 static void free_picture(MpegEncContext *s, Picture *pic){
453     int i;
454
455     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
456         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
457     }
458
459     av_freep(&pic->mb_var);
460     av_freep(&pic->mc_mb_var);
461     av_freep(&pic->mb_mean);
462     av_freep(&pic->mbskip_table);
463     av_freep(&pic->qscale_table);
464     av_freep(&pic->mb_type_base);
465     av_freep(&pic->dct_coeff);
466     av_freep(&pic->pan_scan);
467     pic->mb_type= NULL;
468     for(i=0; i<2; i++){
469         av_freep(&pic->motion_val_base[i]);
470         av_freep(&pic->ref_index[i]);
471     }
472
473     if(pic->type == FF_BUFFER_TYPE_SHARED){
474         for(i=0; i<4; i++){
475             pic->base[i]=
476             pic->data[i]= NULL;
477         }
478         pic->type= 0;
479     }
480 }
481
482 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
483     int i;
484
485     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
486     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
487     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
488
489      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
490     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
491     s->rd_scratchpad=   s->me.scratchpad;
492     s->b_scratchpad=    s->me.scratchpad;
493     s->obmc_scratchpad= s->me.scratchpad + 16;
494     if (s->encoding) {
495         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
496         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
497         if(s->avctx->noise_reduction){
498             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
499         }
500     }
501     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
502     s->block= s->blocks[0];
503
504     for(i=0;i<12;i++){
505         s->pblocks[i] = (short *)(&s->block[i]);
506     }
507     return 0;
508 fail:
509     return -1; //free() through MPV_common_end()
510 }
511
512 static void free_duplicate_context(MpegEncContext *s){
513     if(s==NULL) return;
514
515     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
516     av_freep(&s->me.scratchpad);
517     s->rd_scratchpad=
518     s->b_scratchpad=
519     s->obmc_scratchpad= NULL;
520
521     av_freep(&s->dct_error_sum);
522     av_freep(&s->me.map);
523     av_freep(&s->me.score_map);
524     av_freep(&s->blocks);
525     s->block= NULL;
526 }
527
528 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
529 #define COPY(a) bak->a= src->a
530     COPY(allocated_edge_emu_buffer);
531     COPY(edge_emu_buffer);
532     COPY(me.scratchpad);
533     COPY(rd_scratchpad);
534     COPY(b_scratchpad);
535     COPY(obmc_scratchpad);
536     COPY(me.map);
537     COPY(me.score_map);
538     COPY(blocks);
539     COPY(block);
540     COPY(start_mb_y);
541     COPY(end_mb_y);
542     COPY(me.map_generation);
543     COPY(pb);
544     COPY(dct_error_sum);
545     COPY(dct_count[0]);
546     COPY(dct_count[1]);
547 #undef COPY
548 }
549
550 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
551     MpegEncContext bak;
552     int i;
553     //FIXME copy only needed parts
554 //START_TIMER
555     backup_duplicate_context(&bak, dst);
556     memcpy(dst, src, sizeof(MpegEncContext));
557     backup_duplicate_context(dst, &bak);
558     for(i=0;i<12;i++){
559         dst->pblocks[i] = (short *)(&dst->block[i]);
560     }
561 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
562 }
563
564 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
565 #define COPY(a) dst->a= src->a
566     COPY(pict_type);
567     COPY(current_picture);
568     COPY(f_code);
569     COPY(b_code);
570     COPY(qscale);
571     COPY(lambda);
572     COPY(lambda2);
573     COPY(picture_in_gop_number);
574     COPY(gop_picture_number);
575     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
576     COPY(progressive_frame); //FIXME don't set in encode_header
577     COPY(partitioned_frame); //FIXME don't set in encode_header
578 #undef COPY
579 }
580
581 /**
582  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
583  * the changed fields will not depend upon the prior state of the MpegEncContext.
584  */
585 static void MPV_common_defaults(MpegEncContext *s){
586     s->y_dc_scale_table=
587     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
588     s->chroma_qscale_table= ff_default_chroma_qscale_table;
589     s->progressive_frame= 1;
590     s->progressive_sequence= 1;
591     s->picture_structure= PICT_FRAME;
592
593     s->coded_picture_number = 0;
594     s->picture_number = 0;
595     s->input_picture_number = 0;
596
597     s->picture_in_gop_number = 0;
598
599     s->f_code = 1;
600     s->b_code = 1;
601 }
602
603 /**
604  * sets the given MpegEncContext to defaults for decoding.
605  * the changed fields will not depend upon the prior state of the MpegEncContext.
606  */
607 void MPV_decode_defaults(MpegEncContext *s){
608     MPV_common_defaults(s);
609 }
610
611 /**
612  * sets the given MpegEncContext to defaults for encoding.
613  * the changed fields will not depend upon the prior state of the MpegEncContext.
614  */
615
616 #ifdef CONFIG_ENCODERS
617 static void MPV_encode_defaults(MpegEncContext *s){
618     static int done=0;
619
620     MPV_common_defaults(s);
621
622     if(!done){
623         int i;
624         done=1;
625
626         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
627         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
628
629         for(i=-16; i<16; i++){
630             default_fcode_tab[i + MAX_MV]= 1;
631         }
632     }
633     s->me.mv_penalty= default_mv_penalty;
634     s->fcode_tab= default_fcode_tab;
635 }
636 #endif //CONFIG_ENCODERS
637
638 /**
639  * init common structure for both encoder and decoder.
640  * this assumes that some variables like width/height are already set
641  */
642 int MPV_common_init(MpegEncContext *s)
643 {
644     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
645
646     s->mb_height = (s->height + 15) / 16;
647
648     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
649         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
650         return -1;
651     }
652
653     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
654         return -1;
655
656     dsputil_init(&s->dsp, s->avctx);
657     DCT_common_init(s);
658
659     s->flags= s->avctx->flags;
660     s->flags2= s->avctx->flags2;
661
662     s->mb_width  = (s->width  + 15) / 16;
663     s->mb_stride = s->mb_width + 1;
664     s->b8_stride = s->mb_width*2 + 1;
665     s->b4_stride = s->mb_width*4 + 1;
666     mb_array_size= s->mb_height * s->mb_stride;
667     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
668
669     /* set chroma shifts */
670     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
671                                                     &(s->chroma_y_shift) );
672
673     /* set default edge pos, will be overriden in decode_header if needed */
674     s->h_edge_pos= s->mb_width*16;
675     s->v_edge_pos= s->mb_height*16;
676
677     s->mb_num = s->mb_width * s->mb_height;
678
679     s->block_wrap[0]=
680     s->block_wrap[1]=
681     s->block_wrap[2]=
682     s->block_wrap[3]= s->b8_stride;
683     s->block_wrap[4]=
684     s->block_wrap[5]= s->mb_stride;
685
686     y_size = s->b8_stride * (2 * s->mb_height + 1);
687     c_size = s->mb_stride * (s->mb_height + 1);
688     yc_size = y_size + 2 * c_size;
689
690     /* convert fourcc to upper case */
691     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
692                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
693                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
694                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
695
696     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
697                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
698                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
699                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
700
701     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
702
703     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
704     for(y=0; y<s->mb_height; y++){
705         for(x=0; x<s->mb_width; x++){
706             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
707         }
708     }
709     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
710
711     if (s->encoding) {
712         /* Allocate MV tables */
713         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
714         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
715         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
716         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
717         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
718         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
719         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
720         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
721         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
722         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
723         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
724         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
725
726         if(s->msmpeg4_version){
727             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
728         }
729         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
730
731         /* Allocate MB type table */
732         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
733
734         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
735
736         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
737         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
738         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
739         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
740         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
741         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
742
743         if(s->avctx->noise_reduction){
744             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
745         }
746     }
747     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
748
749     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
750
751     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
752         /* interlaced direct mode decoding tables */
753             for(i=0; i<2; i++){
754                 int j, k;
755                 for(j=0; j<2; j++){
756                     for(k=0; k<2; k++){
757                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
758                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
759                     }
760                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
761                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
762                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
763                 }
764                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
765             }
766     }
767     if (s->out_format == FMT_H263) {
768         /* ac values */
769         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
770         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
771         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
772         s->ac_val[2] = s->ac_val[1] + c_size;
773
774         /* cbp values */
775         CHECKED_ALLOCZ(s->coded_block_base, y_size);
776         s->coded_block= s->coded_block_base + s->b8_stride + 1;
777
778         /* cbp, ac_pred, pred_dir */
779         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
780         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
781     }
782
783     if (s->h263_pred || s->h263_plus || !s->encoding) {
784         /* dc values */
785         //MN: we need these for error resilience of intra-frames
786         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
787         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
788         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
789         s->dc_val[2] = s->dc_val[1] + c_size;
790         for(i=0;i<yc_size;i++)
791             s->dc_val_base[i] = 1024;
792     }
793
794     /* which mb is a intra block */
795     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
796     memset(s->mbintra_table, 1, mb_array_size);
797
798     /* init macroblock skip table */
799     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
800     //Note the +1 is for a quicker mpeg4 slice_end detection
801     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
802
803     s->parse_context.state= -1;
804     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
805        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
806        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
807        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
808     }
809
810     s->context_initialized = 1;
811
812     s->thread_context[0]= s;
813     for(i=1; i<s->avctx->thread_count; i++){
814         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
815         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
816     }
817
818     for(i=0; i<s->avctx->thread_count; i++){
819         if(init_duplicate_context(s->thread_context[i], s) < 0)
820            goto fail;
821         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
822         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
823     }
824
825     return 0;
826  fail:
827     MPV_common_end(s);
828     return -1;
829 }
830
831 /* init common structure for both encoder and decoder */
832 void MPV_common_end(MpegEncContext *s)
833 {
834     int i, j, k;
835
836     for(i=0; i<s->avctx->thread_count; i++){
837         free_duplicate_context(s->thread_context[i]);
838     }
839     for(i=1; i<s->avctx->thread_count; i++){
840         av_freep(&s->thread_context[i]);
841     }
842
843     av_freep(&s->parse_context.buffer);
844     s->parse_context.buffer_size=0;
845
846     av_freep(&s->mb_type);
847     av_freep(&s->p_mv_table_base);
848     av_freep(&s->b_forw_mv_table_base);
849     av_freep(&s->b_back_mv_table_base);
850     av_freep(&s->b_bidir_forw_mv_table_base);
851     av_freep(&s->b_bidir_back_mv_table_base);
852     av_freep(&s->b_direct_mv_table_base);
853     s->p_mv_table= NULL;
854     s->b_forw_mv_table= NULL;
855     s->b_back_mv_table= NULL;
856     s->b_bidir_forw_mv_table= NULL;
857     s->b_bidir_back_mv_table= NULL;
858     s->b_direct_mv_table= NULL;
859     for(i=0; i<2; i++){
860         for(j=0; j<2; j++){
861             for(k=0; k<2; k++){
862                 av_freep(&s->b_field_mv_table_base[i][j][k]);
863                 s->b_field_mv_table[i][j][k]=NULL;
864             }
865             av_freep(&s->b_field_select_table[i][j]);
866             av_freep(&s->p_field_mv_table_base[i][j]);
867             s->p_field_mv_table[i][j]=NULL;
868         }
869         av_freep(&s->p_field_select_table[i]);
870     }
871
872     av_freep(&s->dc_val_base);
873     av_freep(&s->ac_val_base);
874     av_freep(&s->coded_block_base);
875     av_freep(&s->mbintra_table);
876     av_freep(&s->cbp_table);
877     av_freep(&s->pred_dir_table);
878
879     av_freep(&s->mbskip_table);
880     av_freep(&s->prev_pict_types);
881     av_freep(&s->bitstream_buffer);
882     s->allocated_bitstream_buffer_size=0;
883
884     av_freep(&s->avctx->stats_out);
885     av_freep(&s->ac_stats);
886     av_freep(&s->error_status_table);
887     av_freep(&s->mb_index2xy);
888     av_freep(&s->lambda_table);
889     av_freep(&s->q_intra_matrix);
890     av_freep(&s->q_inter_matrix);
891     av_freep(&s->q_intra_matrix16);
892     av_freep(&s->q_inter_matrix16);
893     av_freep(&s->input_picture);
894     av_freep(&s->reordered_input_picture);
895     av_freep(&s->dct_offset);
896
897     if(s->picture){
898         for(i=0; i<MAX_PICTURE_COUNT; i++){
899             free_picture(s, &s->picture[i]);
900         }
901     }
902     av_freep(&s->picture);
903     s->context_initialized = 0;
904     s->last_picture_ptr=
905     s->next_picture_ptr=
906     s->current_picture_ptr= NULL;
907     s->linesize= s->uvlinesize= 0;
908
909     for(i=0; i<3; i++)
910         av_freep(&s->visualization_buffer[i]);
911
912     avcodec_default_free_buffers(s->avctx);
913 }
914
915 #ifdef CONFIG_ENCODERS
916
917 /* init video encoder */
918 int MPV_encode_init(AVCodecContext *avctx)
919 {
920     MpegEncContext *s = avctx->priv_data;
921     int i;
922     int chroma_h_shift, chroma_v_shift;
923
924     MPV_encode_defaults(s);
925
926     if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
927         av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
928         return -1;
929     }
930
931     if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
932         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){
933             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
934             return -1;
935         }
936     }else{
937         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){
938             av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
939             return -1;
940         }
941     }
942
943     s->bit_rate = avctx->bit_rate;
944     s->width = avctx->width;
945     s->height = avctx->height;
946     if(avctx->gop_size > 600){
947         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
948         avctx->gop_size=600;
949     }
950     s->gop_size = avctx->gop_size;
951     s->avctx = avctx;
952     s->flags= avctx->flags;
953     s->flags2= avctx->flags2;
954     s->max_b_frames= avctx->max_b_frames;
955     s->codec_id= avctx->codec->id;
956     s->luma_elim_threshold  = avctx->luma_elim_threshold;
957     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
958     s->strict_std_compliance= avctx->strict_std_compliance;
959     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
960     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
961     s->mpeg_quant= avctx->mpeg_quant;
962     s->rtp_mode= !!avctx->rtp_payload_size;
963     s->intra_dc_precision= avctx->intra_dc_precision;
964     s->user_specified_pts = AV_NOPTS_VALUE;
965
966     if (s->gop_size <= 1) {
967         s->intra_only = 1;
968         s->gop_size = 12;
969     } else {
970         s->intra_only = 0;
971     }
972
973     s->me_method = avctx->me_method;
974
975     /* Fixed QSCALE */
976     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
977
978     s->adaptive_quant= (   s->avctx->lumi_masking
979                         || s->avctx->dark_masking
980                         || s->avctx->temporal_cplx_masking
981                         || s->avctx->spatial_cplx_masking
982                         || s->avctx->p_masking
983                         || s->avctx->border_masking
984                         || (s->flags&CODEC_FLAG_QP_RD))
985                        && !s->fixed_qscale;
986
987     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
988     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
989     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
990
991     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
992         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
993         return -1;
994     }
995
996     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
997         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
998     }
999
1000     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1001         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1002         return -1;
1003     }
1004
1005     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1006         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1007         return -1;
1008     }
1009
1010     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1011        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1012        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1013
1014         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1015     }
1016
1017     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1018        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1019         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1020         return -1;
1021     }
1022
1023     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1024         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1025         return -1;
1026     }
1027
1028     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1029         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1030         return -1;
1031     }
1032
1033     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1034         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1035         return -1;
1036     }
1037
1038     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1039         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1040         return -1;
1041     }
1042
1043     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1044         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1045         return -1;
1046     }
1047
1048     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1049        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1050         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1051         return -1;
1052     }
1053
1054     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1055         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1056         return -1;
1057     }
1058
1059     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1060         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1061         return -1;
1062     }
1063
1064     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1065         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1066         return -1;
1067     }
1068
1069     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1070         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1071         return -1;
1072     }
1073
1074     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1075        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1076        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1077         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1078         return -1;
1079     }
1080
1081     if(s->avctx->thread_count > 1)
1082         s->rtp_mode= 1;
1083
1084     if(!avctx->time_base.den || !avctx->time_base.num){
1085         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1086         return -1;
1087     }
1088
1089     i= (INT_MAX/2+128)>>8;
1090     if(avctx->me_threshold >= i){
1091         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1092         return -1;
1093     }
1094     if(avctx->mb_threshold >= i){
1095         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1096         return -1;
1097     }
1098
1099     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1100         av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass\n");
1101         return -1;
1102     }
1103
1104     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1105     if(i > 1){
1106         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1107         avctx->time_base.den /= i;
1108         avctx->time_base.num /= i;
1109 //        return -1;
1110     }
1111
1112     if(s->codec_id==CODEC_ID_MJPEG){
1113         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1114         s->inter_quant_bias= 0;
1115     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1116         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1117         s->inter_quant_bias= 0;
1118     }else{
1119         s->intra_quant_bias=0;
1120         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1121     }
1122
1123     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1124         s->intra_quant_bias= avctx->intra_quant_bias;
1125     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1126         s->inter_quant_bias= avctx->inter_quant_bias;
1127
1128     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1129
1130     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1131         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1132         return -1;
1133     }
1134     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1135
1136     switch(avctx->codec->id) {
1137     case CODEC_ID_MPEG1VIDEO:
1138         s->out_format = FMT_MPEG1;
1139         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1140         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1141         break;
1142     case CODEC_ID_MPEG2VIDEO:
1143         s->out_format = FMT_MPEG1;
1144         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1145         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1146         s->rtp_mode= 1;
1147         break;
1148     case CODEC_ID_LJPEG:
1149     case CODEC_ID_JPEGLS:
1150     case CODEC_ID_MJPEG:
1151         s->out_format = FMT_MJPEG;
1152         s->intra_only = 1; /* force intra only for jpeg */
1153         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1154         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1155         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1156         s->mjpeg_vsample[1] = 1;
1157         s->mjpeg_vsample[2] = 1;
1158         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1159         s->mjpeg_hsample[1] = 1;
1160         s->mjpeg_hsample[2] = 1;
1161         if (mjpeg_init(s) < 0)
1162             return -1;
1163         avctx->delay=0;
1164         s->low_delay=1;
1165         break;
1166     case CODEC_ID_H261:
1167         s->out_format = FMT_H261;
1168         avctx->delay=0;
1169         s->low_delay=1;
1170         break;
1171     case CODEC_ID_H263:
1172         if (h263_get_picture_format(s->width, s->height) == 7) {
1173             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1174             return -1;
1175         }
1176         s->out_format = FMT_H263;
1177         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1178         avctx->delay=0;
1179         s->low_delay=1;
1180         break;
1181     case CODEC_ID_H263P:
1182         s->out_format = FMT_H263;
1183         s->h263_plus = 1;
1184         /* Fx */
1185         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1186         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1187         s->modified_quant= s->h263_aic;
1188         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1189         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1190         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1191         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1192         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1193
1194         /* /Fx */
1195         /* These are just to be sure */
1196         avctx->delay=0;
1197         s->low_delay=1;
1198         break;
1199     case CODEC_ID_FLV1:
1200         s->out_format = FMT_H263;
1201         s->h263_flv = 2; /* format = 1; 11-bit codes */
1202         s->unrestricted_mv = 1;
1203         s->rtp_mode=0; /* don't allow GOB */
1204         avctx->delay=0;
1205         s->low_delay=1;
1206         break;
1207     case CODEC_ID_RV10:
1208         s->out_format = FMT_H263;
1209         avctx->delay=0;
1210         s->low_delay=1;
1211         break;
1212     case CODEC_ID_RV20:
1213         s->out_format = FMT_H263;
1214         avctx->delay=0;
1215         s->low_delay=1;
1216         s->modified_quant=1;
1217         s->h263_aic=1;
1218         s->h263_plus=1;
1219         s->loop_filter=1;
1220         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1221         break;
1222     case CODEC_ID_MPEG4:
1223         s->out_format = FMT_H263;
1224         s->h263_pred = 1;
1225         s->unrestricted_mv = 1;
1226         s->low_delay= s->max_b_frames ? 0 : 1;
1227         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1228         break;
1229     case CODEC_ID_MSMPEG4V1:
1230         s->out_format = FMT_H263;
1231         s->h263_msmpeg4 = 1;
1232         s->h263_pred = 1;
1233         s->unrestricted_mv = 1;
1234         s->msmpeg4_version= 1;
1235         avctx->delay=0;
1236         s->low_delay=1;
1237         break;
1238     case CODEC_ID_MSMPEG4V2:
1239         s->out_format = FMT_H263;
1240         s->h263_msmpeg4 = 1;
1241         s->h263_pred = 1;
1242         s->unrestricted_mv = 1;
1243         s->msmpeg4_version= 2;
1244         avctx->delay=0;
1245         s->low_delay=1;
1246         break;
1247     case CODEC_ID_MSMPEG4V3:
1248         s->out_format = FMT_H263;
1249         s->h263_msmpeg4 = 1;
1250         s->h263_pred = 1;
1251         s->unrestricted_mv = 1;
1252         s->msmpeg4_version= 3;
1253         s->flipflop_rounding=1;
1254         avctx->delay=0;
1255         s->low_delay=1;
1256         break;
1257     case CODEC_ID_WMV1:
1258         s->out_format = FMT_H263;
1259         s->h263_msmpeg4 = 1;
1260         s->h263_pred = 1;
1261         s->unrestricted_mv = 1;
1262         s->msmpeg4_version= 4;
1263         s->flipflop_rounding=1;
1264         avctx->delay=0;
1265         s->low_delay=1;
1266         break;
1267     case CODEC_ID_WMV2:
1268         s->out_format = FMT_H263;
1269         s->h263_msmpeg4 = 1;
1270         s->h263_pred = 1;
1271         s->unrestricted_mv = 1;
1272         s->msmpeg4_version= 5;
1273         s->flipflop_rounding=1;
1274         avctx->delay=0;
1275         s->low_delay=1;
1276         break;
1277     default:
1278         return -1;
1279     }
1280
1281     avctx->has_b_frames= !s->low_delay;
1282
1283     s->encoding = 1;
1284
1285     /* init */
1286     if (MPV_common_init(s) < 0)
1287         return -1;
1288
1289     if(s->modified_quant)
1290         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1291     s->progressive_frame=
1292     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1293     s->quant_precision=5;
1294
1295     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1296     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1297
1298 #ifdef CONFIG_H261_ENCODER
1299     if (s->out_format == FMT_H261)
1300         ff_h261_encode_init(s);
1301 #endif
1302     if (s->out_format == FMT_H263)
1303         h263_encode_init(s);
1304     if(s->msmpeg4_version)
1305         ff_msmpeg4_encode_init(s);
1306     if (s->out_format == FMT_MPEG1)
1307         ff_mpeg1_encode_init(s);
1308
1309     /* init q matrix */
1310     for(i=0;i<64;i++) {
1311         int j= s->dsp.idct_permutation[i];
1312         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1313             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1314             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1315         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1316             s->intra_matrix[j] =
1317             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1318         }else
1319         { /* mpeg1/2 */
1320             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1321             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1322         }
1323         if(s->avctx->intra_matrix)
1324             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1325         if(s->avctx->inter_matrix)
1326             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1327     }
1328
1329     /* precompute matrix */
1330     /* for mjpeg, we do include qscale in the matrix */
1331     if (s->out_format != FMT_MJPEG) {
1332         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1333                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1334         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1335                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1336     }
1337
1338     if(ff_rate_control_init(s) < 0)
1339         return -1;
1340
1341     return 0;
1342 }
1343
1344 int MPV_encode_end(AVCodecContext *avctx)
1345 {
1346     MpegEncContext *s = avctx->priv_data;
1347
1348 #ifdef STATS
1349     print_stats();
1350 #endif
1351
1352     ff_rate_control_uninit(s);
1353
1354     MPV_common_end(s);
1355     if (s->out_format == FMT_MJPEG)
1356         mjpeg_close(s);
1357
1358     av_freep(&avctx->extradata);
1359
1360     return 0;
1361 }
1362
1363 #endif //CONFIG_ENCODERS
1364
1365 void init_rl(RLTable *rl, int use_static)
1366 {
1367     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1368     uint8_t index_run[MAX_RUN+1];
1369     int last, run, level, start, end, i;
1370
1371     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1372     if(use_static && rl->max_level[0])
1373         return;
1374
1375     /* compute max_level[], max_run[] and index_run[] */
1376     for(last=0;last<2;last++) {
1377         if (last == 0) {
1378             start = 0;
1379             end = rl->last;
1380         } else {
1381             start = rl->last;
1382             end = rl->n;
1383         }
1384
1385         memset(max_level, 0, MAX_RUN + 1);
1386         memset(max_run, 0, MAX_LEVEL + 1);
1387         memset(index_run, rl->n, MAX_RUN + 1);
1388         for(i=start;i<end;i++) {
1389             run = rl->table_run[i];
1390             level = rl->table_level[i];
1391             if (index_run[run] == rl->n)
1392                 index_run[run] = i;
1393             if (level > max_level[run])
1394                 max_level[run] = level;
1395             if (run > max_run[level])
1396                 max_run[level] = run;
1397         }
1398         if(use_static)
1399             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1400         else
1401             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1402         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1403         if(use_static)
1404             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1405         else
1406             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1407         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1408         if(use_static)
1409             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1410         else
1411             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1412         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1413     }
1414 }
1415
1416 /* draw the edges of width 'w' of an image of size width, height */
1417 //FIXME check that this is ok for mpeg4 interlaced
1418 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1419 {
1420     uint8_t *ptr, *last_line;
1421     int i;
1422
1423     last_line = buf + (height - 1) * wrap;
1424     for(i=0;i<w;i++) {
1425         /* top and bottom */
1426         memcpy(buf - (i + 1) * wrap, buf, width);
1427         memcpy(last_line + (i + 1) * wrap, last_line, width);
1428     }
1429     /* left and right */
1430     ptr = buf;
1431     for(i=0;i<height;i++) {
1432         memset(ptr - w, ptr[0], w);
1433         memset(ptr + width, ptr[width-1], w);
1434         ptr += wrap;
1435     }
1436     /* corners */
1437     for(i=0;i<w;i++) {
1438         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1439         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1440         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1441         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1442     }
1443 }
1444
1445 int ff_find_unused_picture(MpegEncContext *s, int shared){
1446     int i;
1447
1448     if(shared){
1449         for(i=0; i<MAX_PICTURE_COUNT; i++){
1450             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1451         }
1452     }else{
1453         for(i=0; i<MAX_PICTURE_COUNT; i++){
1454             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1455         }
1456         for(i=0; i<MAX_PICTURE_COUNT; i++){
1457             if(s->picture[i].data[0]==NULL) return i;
1458         }
1459     }
1460
1461     assert(0);
1462     return -1;
1463 }
1464
1465 static void update_noise_reduction(MpegEncContext *s){
1466     int intra, i;
1467
1468     for(intra=0; intra<2; intra++){
1469         if(s->dct_count[intra] > (1<<16)){
1470             for(i=0; i<64; i++){
1471                 s->dct_error_sum[intra][i] >>=1;
1472             }
1473             s->dct_count[intra] >>= 1;
1474         }
1475
1476         for(i=0; i<64; i++){
1477             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1478         }
1479     }
1480 }
1481
1482 /**
1483  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1484  */
1485 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1486 {
1487     int i;
1488     AVFrame *pic;
1489     s->mb_skipped = 0;
1490
1491     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1492
1493     /* mark&release old frames */
1494     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1495         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1496
1497         /* release forgotten pictures */
1498         /* if(mpeg124/h263) */
1499         if(!s->encoding){
1500             for(i=0; i<MAX_PICTURE_COUNT; i++){
1501                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1502                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1503                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1504                 }
1505             }
1506         }
1507     }
1508 alloc:
1509     if(!s->encoding){
1510         /* release non reference frames */
1511         for(i=0; i<MAX_PICTURE_COUNT; i++){
1512             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1513                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1514             }
1515         }
1516
1517         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1518             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1519         else{
1520             i= ff_find_unused_picture(s, 0);
1521             pic= (AVFrame*)&s->picture[i];
1522         }
1523
1524         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1525                         && !s->dropable ? 3 : 0;
1526
1527         pic->coded_picture_number= s->coded_picture_number++;
1528
1529         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1530             return -1;
1531
1532         s->current_picture_ptr= (Picture*)pic;
1533         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1534         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1535     }
1536
1537     s->current_picture_ptr->pict_type= s->pict_type;
1538 //    if(s->flags && CODEC_FLAG_QSCALE)
1539   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1540     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1541
1542     copy_picture(&s->current_picture, s->current_picture_ptr);
1543
1544   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1545     if (s->pict_type != B_TYPE) {
1546         s->last_picture_ptr= s->next_picture_ptr;
1547         if(!s->dropable)
1548             s->next_picture_ptr= s->current_picture_ptr;
1549     }
1550 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1551         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1552         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1553         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1554         s->pict_type, s->dropable);*/
1555
1556     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1557     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1558
1559     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1560         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1561         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1562         goto alloc;
1563     }
1564
1565     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1566
1567     if(s->picture_structure!=PICT_FRAME){
1568         int i;
1569         for(i=0; i<4; i++){
1570             if(s->picture_structure == PICT_BOTTOM_FIELD){
1571                  s->current_picture.data[i] += s->current_picture.linesize[i];
1572             }
1573             s->current_picture.linesize[i] *= 2;
1574             s->last_picture.linesize[i] *=2;
1575             s->next_picture.linesize[i] *=2;
1576         }
1577     }
1578   }
1579
1580     s->hurry_up= s->avctx->hurry_up;
1581     s->error_resilience= avctx->error_resilience;
1582
1583     /* set dequantizer, we can't do it during init as it might change for mpeg4
1584        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1585     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1586         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1587         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1588     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1589         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1590         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1591     }else{
1592         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1593         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1594     }
1595
1596     if(s->dct_error_sum){
1597         assert(s->avctx->noise_reduction && s->encoding);
1598
1599         update_noise_reduction(s);
1600     }
1601
1602 #ifdef HAVE_XVMC
1603     if(s->avctx->xvmc_acceleration)
1604         return XVMC_field_start(s, avctx);
1605 #endif
1606     return 0;
1607 }
1608
1609 /* generic function for encode/decode called after a frame has been coded/decoded */
1610 void MPV_frame_end(MpegEncContext *s)
1611 {
1612     int i;
1613     /* draw edge for correct motion prediction if outside */
1614 #ifdef HAVE_XVMC
1615 //just to make sure that all data is rendered.
1616     if(s->avctx->xvmc_acceleration){
1617         XVMC_field_end(s);
1618     }else
1619 #endif
1620     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1621             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1622             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1623             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1624     }
1625     emms_c();
1626
1627     s->last_pict_type    = s->pict_type;
1628     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1629     if(s->pict_type!=B_TYPE){
1630         s->last_non_b_pict_type= s->pict_type;
1631     }
1632 #if 0
1633         /* copy back current_picture variables */
1634     for(i=0; i<MAX_PICTURE_COUNT; i++){
1635         if(s->picture[i].data[0] == s->current_picture.data[0]){
1636             s->picture[i]= s->current_picture;
1637             break;
1638         }
1639     }
1640     assert(i<MAX_PICTURE_COUNT);
1641 #endif
1642
1643     if(s->encoding){
1644         /* release non-reference frames */
1645         for(i=0; i<MAX_PICTURE_COUNT; i++){
1646             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1647                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1648             }
1649         }
1650     }
1651     // clear copies, to avoid confusion
1652 #if 0
1653     memset(&s->last_picture, 0, sizeof(Picture));
1654     memset(&s->next_picture, 0, sizeof(Picture));
1655     memset(&s->current_picture, 0, sizeof(Picture));
1656 #endif
1657     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1658 }
1659
1660 /**
1661  * draws an line from (ex, ey) -> (sx, sy).
1662  * @param w width of the image
1663  * @param h height of the image
1664  * @param stride stride/linesize of the image
1665  * @param color color of the arrow
1666  */
1667 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1668     int t, x, y, fr, f;
1669
1670     sx= clip(sx, 0, w-1);
1671     sy= clip(sy, 0, h-1);
1672     ex= clip(ex, 0, w-1);
1673     ey= clip(ey, 0, h-1);
1674
1675     buf[sy*stride + sx]+= color;
1676
1677     if(ABS(ex - sx) > ABS(ey - sy)){
1678         if(sx > ex){
1679             t=sx; sx=ex; ex=t;
1680             t=sy; sy=ey; ey=t;
1681         }
1682         buf+= sx + sy*stride;
1683         ex-= sx;
1684         f= ((ey-sy)<<16)/ex;
1685         for(x= 0; x <= ex; x++){
1686             y = (x*f)>>16;
1687             fr= (x*f)&0xFFFF;
1688             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1689             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1690         }
1691     }else{
1692         if(sy > ey){
1693             t=sx; sx=ex; ex=t;
1694             t=sy; sy=ey; ey=t;
1695         }
1696         buf+= sx + sy*stride;
1697         ey-= sy;
1698         if(ey) f= ((ex-sx)<<16)/ey;
1699         else   f= 0;
1700         for(y= 0; y <= ey; y++){
1701             x = (y*f)>>16;
1702             fr= (y*f)&0xFFFF;
1703             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1704             buf[y*stride + x+1]+= (color*         fr )>>16;;
1705         }
1706     }
1707 }
1708
1709 /**
1710  * draws an arrow from (ex, ey) -> (sx, sy).
1711  * @param w width of the image
1712  * @param h height of the image
1713  * @param stride stride/linesize of the image
1714  * @param color color of the arrow
1715  */
1716 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1717     int dx,dy;
1718
1719     sx= clip(sx, -100, w+100);
1720     sy= clip(sy, -100, h+100);
1721     ex= clip(ex, -100, w+100);
1722     ey= clip(ey, -100, h+100);
1723
1724     dx= ex - sx;
1725     dy= ey - sy;
1726
1727     if(dx*dx + dy*dy > 3*3){
1728         int rx=  dx + dy;
1729         int ry= -dx + dy;
1730         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1731
1732         //FIXME subpixel accuracy
1733         rx= ROUNDED_DIV(rx*3<<4, length);
1734         ry= ROUNDED_DIV(ry*3<<4, length);
1735
1736         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1737         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1738     }
1739     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1740 }
1741
1742 /**
1743  * prints debuging info for the given picture.
1744  */
1745 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1746
1747     if(!pict || !pict->mb_type) return;
1748
1749     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1750         int x,y;
1751
1752         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1753         switch (pict->pict_type) {
1754             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1755             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1756             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1757             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1758             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1759             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1760         }
1761         for(y=0; y<s->mb_height; y++){
1762             for(x=0; x<s->mb_width; x++){
1763                 if(s->avctx->debug&FF_DEBUG_SKIP){
1764                     int count= s->mbskip_table[x + y*s->mb_stride];
1765                     if(count>9) count=9;
1766                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1767                 }
1768                 if(s->avctx->debug&FF_DEBUG_QP){
1769                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1770                 }
1771                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1772                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1773                     //Type & MV direction
1774                     if(IS_PCM(mb_type))
1775                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1776                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1777                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1778                     else if(IS_INTRA4x4(mb_type))
1779                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1780                     else if(IS_INTRA16x16(mb_type))
1781                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1782                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1783                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1784                     else if(IS_DIRECT(mb_type))
1785                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1786                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1787                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1788                     else if(IS_GMC(mb_type))
1789                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1790                     else if(IS_SKIP(mb_type))
1791                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1792                     else if(!USES_LIST(mb_type, 1))
1793                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1794                     else if(!USES_LIST(mb_type, 0))
1795                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1796                     else{
1797                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1798                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1799                     }
1800
1801                     //segmentation
1802                     if(IS_8X8(mb_type))
1803                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1804                     else if(IS_16X8(mb_type))
1805                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1806                     else if(IS_8X16(mb_type))
1807                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1808                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1809                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1810                     else
1811                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1812
1813
1814                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1815                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1816                     else
1817                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1818                 }
1819 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1820             }
1821             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1822         }
1823     }
1824
1825     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1826         const int shift= 1 + s->quarter_sample;
1827         int mb_y;
1828         uint8_t *ptr;
1829         int i;
1830         int h_chroma_shift, v_chroma_shift;
1831         const int width = s->avctx->width;
1832         const int height= s->avctx->height;
1833         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1834         const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
1835         s->low_delay=0; //needed to see the vectors without trashing the buffers
1836
1837         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1838         for(i=0; i<3; i++){
1839             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1840             pict->data[i]= s->visualization_buffer[i];
1841         }
1842         pict->type= FF_BUFFER_TYPE_COPY;
1843         ptr= pict->data[0];
1844
1845         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1846             int mb_x;
1847             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1848                 const int mb_index= mb_x + mb_y*s->mb_stride;
1849                 if((s->avctx->debug_mv) && pict->motion_val){
1850                   int type;
1851                   for(type=0; type<3; type++){
1852                     int direction = 0;
1853                     switch (type) {
1854                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1855                                 continue;
1856                               direction = 0;
1857                               break;
1858                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1859                                 continue;
1860                               direction = 0;
1861                               break;
1862                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1863                                 continue;
1864                               direction = 1;
1865                               break;
1866                     }
1867                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1868                         continue;
1869
1870                     if(IS_8X8(pict->mb_type[mb_index])){
1871                       int i;
1872                       for(i=0; i<4; i++){
1873                         int sx= mb_x*16 + 4 + 8*(i&1);
1874                         int sy= mb_y*16 + 4 + 8*(i>>1);
1875                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1876                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1877                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1878                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1879                       }
1880                     }else if(IS_16X8(pict->mb_type[mb_index])){
1881                       int i;
1882                       for(i=0; i<2; i++){
1883                         int sx=mb_x*16 + 8;
1884                         int sy=mb_y*16 + 4 + 8*i;
1885                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1886                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1887                         int my=(pict->motion_val[direction][xy][1]>>shift);
1888
1889                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1890                             my*=2;
1891
1892                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1893                       }
1894                     }else if(IS_8X16(pict->mb_type[mb_index])){
1895                       int i;
1896                       for(i=0; i<2; i++){
1897                         int sx=mb_x*16 + 4 + 8*i;
1898                         int sy=mb_y*16 + 8;
1899                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1900                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1901                         int my=(pict->motion_val[direction][xy][1]>>shift);
1902
1903                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1904                             my*=2;
1905
1906                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1907                       }
1908                     }else{
1909                       int sx= mb_x*16 + 8;
1910                       int sy= mb_y*16 + 8;
1911                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1912                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1913                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1914                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1915                     }
1916                   }
1917                 }
1918                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1919                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1920                     int y;
1921                     for(y=0; y<8; y++){
1922                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1923                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1924                     }
1925                 }
1926                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1927                     int mb_type= pict->mb_type[mb_index];
1928                     uint64_t u,v;
1929                     int y;
1930 #define COLOR(theta, r)\
1931 u= (int)(128 + r*cos(theta*3.141592/180));\
1932 v= (int)(128 + r*sin(theta*3.141592/180));
1933
1934
1935                     u=v=128;
1936                     if(IS_PCM(mb_type)){
1937                         COLOR(120,48)
1938                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1939                         COLOR(30,48)
1940                     }else if(IS_INTRA4x4(mb_type)){
1941                         COLOR(90,48)
1942                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1943 //                        COLOR(120,48)
1944                     }else if(IS_DIRECT(mb_type)){
1945                         COLOR(150,48)
1946                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1947                         COLOR(170,48)
1948                     }else if(IS_GMC(mb_type)){
1949                         COLOR(190,48)
1950                     }else if(IS_SKIP(mb_type)){
1951 //                        COLOR(180,48)
1952                     }else if(!USES_LIST(mb_type, 1)){
1953                         COLOR(240,48)
1954                     }else if(!USES_LIST(mb_type, 0)){
1955                         COLOR(0,48)
1956                     }else{
1957                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1958                         COLOR(300,48)
1959                     }
1960
1961                     u*= 0x0101010101010101ULL;
1962                     v*= 0x0101010101010101ULL;
1963                     for(y=0; y<8; y++){
1964                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1965                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1966                     }
1967
1968                     //segmentation
1969                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1970                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1971                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1972                     }
1973                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1974                         for(y=0; y<16; y++)
1975                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1976                     }
1977                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1978                         int dm= 1 << (mv_sample_log2-2);
1979                         for(i=0; i<4; i++){
1980                             int sx= mb_x*16 + 8*(i&1);
1981                             int sy= mb_y*16 + 8*(i>>1);
1982                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1983                             //FIXME bidir
1984                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1985                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1986                                 for(y=0; y<8; y++)
1987                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1988                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1989                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1990                         }
1991                     }
1992
1993                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1994                         // hmm
1995                     }
1996                 }
1997                 s->mbskip_table[mb_index]=0;
1998             }
1999         }
2000     }
2001 }
2002
2003 #ifdef CONFIG_ENCODERS
2004
2005 static int get_sae(uint8_t *src, int ref, int stride){
2006     int x,y;
2007     int acc=0;
2008
2009     for(y=0; y<16; y++){
2010         for(x=0; x<16; x++){
2011             acc+= ABS(src[x+y*stride] - ref);
2012         }
2013     }
2014
2015     return acc;
2016 }
2017
2018 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2019     int x, y, w, h;
2020     int acc=0;
2021
2022     w= s->width &~15;
2023     h= s->height&~15;
2024
2025     for(y=0; y<h; y+=16){
2026         for(x=0; x<w; x+=16){
2027             int offset= x + y*stride;
2028             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2029             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2030             int sae = get_sae(src + offset, mean, stride);
2031
2032             acc+= sae + 500 < sad;
2033         }
2034     }
2035     return acc;
2036 }
2037
2038
2039 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2040     AVFrame *pic=NULL;
2041     int64_t pts;
2042     int i;
2043     const int encoding_delay= s->max_b_frames;
2044     int direct=1;
2045
2046     if(pic_arg){
2047         pts= pic_arg->pts;
2048         pic_arg->display_picture_number= s->input_picture_number++;
2049
2050         if(pts != AV_NOPTS_VALUE){
2051             if(s->user_specified_pts != AV_NOPTS_VALUE){
2052                 int64_t time= pts;
2053                 int64_t last= s->user_specified_pts;
2054
2055                 if(time <= last){
2056                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2057                     return -1;
2058                 }
2059             }
2060             s->user_specified_pts= pts;
2061         }else{
2062             if(s->user_specified_pts != AV_NOPTS_VALUE){
2063                 s->user_specified_pts=
2064                 pts= s->user_specified_pts + 1;
2065                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2066             }else{
2067                 pts= pic_arg->display_picture_number;
2068             }
2069         }
2070     }
2071
2072   if(pic_arg){
2073     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2074     if(pic_arg->linesize[0] != s->linesize) direct=0;
2075     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2076     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2077
2078 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2079
2080     if(direct){
2081         i= ff_find_unused_picture(s, 1);
2082
2083         pic= (AVFrame*)&s->picture[i];
2084         pic->reference= 3;
2085
2086         for(i=0; i<4; i++){
2087             pic->data[i]= pic_arg->data[i];
2088             pic->linesize[i]= pic_arg->linesize[i];
2089         }
2090         alloc_picture(s, (Picture*)pic, 1);
2091     }else{
2092         i= ff_find_unused_picture(s, 0);
2093
2094         pic= (AVFrame*)&s->picture[i];
2095         pic->reference= 3;
2096
2097         alloc_picture(s, (Picture*)pic, 0);
2098
2099         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2100            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2101            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2102        // empty
2103         }else{
2104             int h_chroma_shift, v_chroma_shift;
2105             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2106
2107             for(i=0; i<3; i++){
2108                 int src_stride= pic_arg->linesize[i];
2109                 int dst_stride= i ? s->uvlinesize : s->linesize;
2110                 int h_shift= i ? h_chroma_shift : 0;
2111                 int v_shift= i ? v_chroma_shift : 0;
2112                 int w= s->width >>h_shift;
2113                 int h= s->height>>v_shift;
2114                 uint8_t *src= pic_arg->data[i];
2115                 uint8_t *dst= pic->data[i] + INPLACE_OFFSET;
2116
2117                 if(src_stride==dst_stride)
2118                     memcpy(dst, src, src_stride*h);
2119                 else{
2120                     while(h--){
2121                         memcpy(dst, src, w);
2122                         dst += dst_stride;
2123                         src += src_stride;
2124                     }
2125                 }
2126             }
2127         }
2128     }
2129     copy_picture_attributes(s, pic, pic_arg);
2130     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2131   }
2132
2133     /* shift buffer entries */
2134     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2135         s->input_picture[i-1]= s->input_picture[i];
2136
2137     s->input_picture[encoding_delay]= (Picture*)pic;
2138
2139     return 0;
2140 }
2141
2142 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2143     int x, y, plane;
2144     int score=0;
2145     int64_t score64=0;
2146
2147     for(plane=0; plane<3; plane++){
2148         const int stride= p->linesize[plane];
2149         const int bw= plane ? 1 : 2;
2150         for(y=0; y<s->mb_height*bw; y++){
2151             for(x=0; x<s->mb_width*bw; x++){
2152                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2153                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2154
2155                 switch(s->avctx->frame_skip_exp){
2156                     case 0: score= FFMAX(score, v); break;
2157                     case 1: score+= ABS(v);break;
2158                     case 2: score+= v*v;break;
2159                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2160                     case 4: score64+= v*v*(int64_t)(v*v);break;
2161                 }
2162             }
2163         }
2164     }
2165
2166     if(score) score64= score;
2167
2168     if(score64 < s->avctx->frame_skip_threshold)
2169         return 1;
2170     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2171         return 1;
2172     return 0;
2173 }
2174
2175 static int estimate_best_b_count(MpegEncContext *s){
2176     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2177     AVCodecContext *c= avcodec_alloc_context();
2178     AVFrame input[FF_MAX_B_FRAMES+2];
2179     const int scale= s->avctx->brd_scale;
2180     int i, j, out_size, p_lambda, b_lambda, lambda2;
2181     int outbuf_size= s->width * s->height; //FIXME
2182     uint8_t *outbuf= av_malloc(outbuf_size);
2183     ImgReSampleContext *resample;
2184     int64_t best_rd= INT64_MAX;
2185     int best_b_count= -1;
2186
2187 //    emms_c();
2188     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2189     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2190     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2191     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2192
2193     c->width = s->width >> scale;
2194     c->height= s->height>> scale;
2195     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2196     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2197     c->mb_decision= s->avctx->mb_decision;
2198     c->me_cmp= s->avctx->me_cmp;
2199     c->mb_cmp= s->avctx->mb_cmp;
2200     c->me_sub_cmp= s->avctx->me_sub_cmp;
2201     c->pix_fmt = PIX_FMT_YUV420P;
2202     c->time_base= s->avctx->time_base;
2203     c->max_b_frames= s->max_b_frames;
2204
2205     if (avcodec_open(c, codec) < 0)
2206         return -1;
2207
2208     resample= img_resample_init(c->width, c->height, s->width, s->height); //FIXME use sws
2209
2210     for(i=0; i<s->max_b_frames+2; i++){
2211         int ysize= c->width*c->height;
2212         int csize= (c->width/2)*(c->height/2);
2213         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2214
2215         if(pre_input_ptr)
2216             pre_input= *pre_input_ptr;
2217
2218         if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
2219             pre_input.data[0]+=INPLACE_OFFSET;
2220             pre_input.data[1]+=INPLACE_OFFSET;
2221             pre_input.data[2]+=INPLACE_OFFSET;
2222         }
2223
2224         avcodec_get_frame_defaults(&input[i]);
2225         input[i].data[0]= av_malloc(ysize + 2*csize);
2226         input[i].data[1]= input[i].data[0] + ysize;
2227         input[i].data[2]= input[i].data[1] + csize;
2228         input[i].linesize[0]= c->width;
2229         input[i].linesize[1]=
2230         input[i].linesize[2]= c->width/2;
2231
2232         if(!i || s->input_picture[i-1])
2233             img_resample(resample, &input[i], &pre_input);
2234     }
2235
2236     for(j=0; j<s->max_b_frames+1; j++){
2237         int64_t rd=0;
2238
2239         if(!s->input_picture[j])
2240             break;
2241
2242         c->error[0]= c->error[1]= c->error[2]= 0;
2243
2244         input[0].pict_type= I_TYPE;
2245         input[0].quality= 1 * FF_QP2LAMBDA;
2246         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2247 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2248
2249         for(i=0; i<s->max_b_frames+1; i++){
2250             int is_p= i % (j+1) == j || i==s->max_b_frames;
2251
2252             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2253             input[i+1].quality= is_p ? p_lambda : b_lambda;
2254             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2255             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2256         }
2257
2258         /* get the delayed frames */
2259         while(out_size){
2260             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2261             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2262         }
2263
2264         rd += c->error[0] + c->error[1] + c->error[2];
2265
2266         if(rd < best_rd){
2267             best_rd= rd;
2268             best_b_count= j;
2269         }
2270     }
2271
2272     av_freep(&outbuf);
2273     avcodec_close(c);
2274     av_freep(&c);
2275     img_resample_close(resample);
2276
2277     for(i=0; i<s->max_b_frames+2; i++){
2278         av_freep(&input[i].data[0]);
2279     }
2280
2281     return best_b_count;
2282 }
2283
2284 static void select_input_picture(MpegEncContext *s){
2285     int i;
2286
2287     for(i=1; i<MAX_PICTURE_COUNT; i++)
2288         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2289     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2290
2291     /* set next picture type & ordering */
2292     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2293         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2294             s->reordered_input_picture[0]= s->input_picture[0];
2295             s->reordered_input_picture[0]->pict_type= I_TYPE;
2296             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2297         }else{
2298             int b_frames;
2299
2300             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2301                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2302                 //FIXME check that te gop check above is +-1 correct
2303 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2304
2305                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2306                         for(i=0; i<4; i++)
2307                             s->input_picture[0]->data[i]= NULL;
2308                         s->input_picture[0]->type= 0;
2309                     }else{
2310                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2311                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2312
2313                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2314                     }
2315
2316                     emms_c();
2317                     ff_vbv_update(s, 0);
2318
2319                     goto no_output_pic;
2320                 }
2321             }
2322
2323             if(s->flags&CODEC_FLAG_PASS2){
2324                 for(i=0; i<s->max_b_frames+1; i++){
2325                     int pict_num= s->input_picture[0]->display_picture_number + i;
2326
2327                     if(pict_num >= s->rc_context.num_entries)
2328                         break;
2329                     if(!s->input_picture[i]){
2330                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2331                         break;
2332                     }
2333
2334                     s->input_picture[i]->pict_type=
2335                         s->rc_context.entry[pict_num].new_pict_type;
2336                 }
2337             }
2338
2339             if(s->avctx->b_frame_strategy==0){
2340                 b_frames= s->max_b_frames;
2341                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2342             }else if(s->avctx->b_frame_strategy==1){
2343                 for(i=1; i<s->max_b_frames+1; i++){
2344                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2345                         s->input_picture[i]->b_frame_score=
2346                             get_intra_count(s, s->input_picture[i  ]->data[0],
2347                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2348                     }
2349                 }
2350                 for(i=0; i<s->max_b_frames+1; i++){
2351                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2352                 }
2353
2354                 b_frames= FFMAX(0, i-1);
2355
2356                 /* reset scores */
2357                 for(i=0; i<b_frames+1; i++){
2358                     s->input_picture[i]->b_frame_score=0;
2359                 }
2360             }else if(s->avctx->b_frame_strategy==2){
2361                 b_frames= estimate_best_b_count(s);
2362             }else{
2363                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2364                 b_frames=0;
2365             }
2366
2367             emms_c();
2368 //static int b_count=0;
2369 //b_count+= b_frames;
2370 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2371
2372             for(i= b_frames - 1; i>=0; i--){
2373                 int type= s->input_picture[i]->pict_type;
2374                 if(type && type != B_TYPE)
2375                     b_frames= i;
2376             }
2377             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2378                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2379             }
2380
2381             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2382               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2383                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2384               }else{
2385                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2386                     b_frames=0;
2387                 s->input_picture[b_frames]->pict_type= I_TYPE;
2388               }
2389             }
2390
2391             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2392                && b_frames
2393                && s->input_picture[b_frames]->pict_type== I_TYPE)
2394                 b_frames--;
2395
2396             s->reordered_input_picture[0]= s->input_picture[b_frames];
2397             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2398                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2399             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2400             for(i=0; i<b_frames; i++){
2401                 s->reordered_input_picture[i+1]= s->input_picture[i];
2402                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2403                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2404             }
2405         }
2406     }
2407 no_output_pic:
2408     if(s->reordered_input_picture[0]){
2409         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2410
2411         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2412
2413         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2414             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2415
2416             int i= ff_find_unused_picture(s, 0);
2417             Picture *pic= &s->picture[i];
2418
2419             /* mark us unused / free shared pic */
2420             for(i=0; i<4; i++)
2421                 s->reordered_input_picture[0]->data[i]= NULL;
2422             s->reordered_input_picture[0]->type= 0;
2423
2424             pic->reference              = s->reordered_input_picture[0]->reference;
2425
2426             alloc_picture(s, pic, 0);
2427
2428             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2429
2430             s->current_picture_ptr= pic;
2431         }else{
2432             // input is not a shared pix -> reuse buffer for current_pix
2433
2434             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2435                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2436
2437             s->current_picture_ptr= s->reordered_input_picture[0];
2438             for(i=0; i<4; i++){
2439                 s->new_picture.data[i]+= INPLACE_OFFSET;
2440             }
2441         }
2442         copy_picture(&s->current_picture, s->current_picture_ptr);
2443
2444         s->picture_number= s->new_picture.display_picture_number;
2445 //printf("dpn:%d\n", s->picture_number);
2446     }else{
2447        memset(&s->new_picture, 0, sizeof(Picture));
2448     }
2449 }
2450
2451 int MPV_encode_picture(AVCodecContext *avctx,
2452                        unsigned char *buf, int buf_size, void *data)
2453 {
2454     MpegEncContext *s = avctx->priv_data;
2455     AVFrame *pic_arg = data;
2456     int i, stuffing_count;
2457
2458     if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2459         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2460         return -1;
2461     }
2462
2463     for(i=0; i<avctx->thread_count; i++){
2464         int start_y= s->thread_context[i]->start_mb_y;
2465         int   end_y= s->thread_context[i]->  end_mb_y;
2466         int h= s->mb_height;
2467         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2468         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2469
2470         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2471     }
2472
2473     s->picture_in_gop_number++;
2474
2475     if(load_input_picture(s, pic_arg) < 0)
2476         return -1;
2477
2478     select_input_picture(s);
2479
2480     /* output? */
2481     if(s->new_picture.data[0]){
2482         s->pict_type= s->new_picture.pict_type;
2483 //emms_c();
2484 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2485         MPV_frame_start(s, avctx);
2486
2487         encode_picture(s, s->picture_number);
2488
2489         avctx->real_pict_num  = s->picture_number;
2490         avctx->header_bits = s->header_bits;
2491         avctx->mv_bits     = s->mv_bits;
2492         avctx->misc_bits   = s->misc_bits;
2493         avctx->i_tex_bits  = s->i_tex_bits;
2494         avctx->p_tex_bits  = s->p_tex_bits;
2495         avctx->i_count     = s->i_count;
2496         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2497         avctx->skip_count  = s->skip_count;
2498
2499         MPV_frame_end(s);
2500
2501         if (s->out_format == FMT_MJPEG)
2502             mjpeg_picture_trailer(s);
2503
2504         if(s->flags&CODEC_FLAG_PASS1)
2505             ff_write_pass1_stats(s);
2506
2507         for(i=0; i<4; i++){
2508             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2509             avctx->error[i] += s->current_picture_ptr->error[i];
2510         }
2511
2512         if(s->flags&CODEC_FLAG_PASS1)
2513             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2514         flush_put_bits(&s->pb);
2515         s->frame_bits  = put_bits_count(&s->pb);
2516
2517         stuffing_count= ff_vbv_update(s, s->frame_bits);
2518         if(stuffing_count){
2519             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2520                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2521                 return -1;
2522             }
2523
2524             switch(s->codec_id){
2525             case CODEC_ID_MPEG1VIDEO:
2526             case CODEC_ID_MPEG2VIDEO:
2527                 while(stuffing_count--){
2528                     put_bits(&s->pb, 8, 0);
2529                 }
2530             break;
2531             case CODEC_ID_MPEG4:
2532                 put_bits(&s->pb, 16, 0);
2533                 put_bits(&s->pb, 16, 0x1C3);
2534                 stuffing_count -= 4;
2535                 while(stuffing_count--){
2536                     put_bits(&s->pb, 8, 0xFF);
2537                 }
2538             break;
2539             default:
2540                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2541             }
2542             flush_put_bits(&s->pb);
2543             s->frame_bits  = put_bits_count(&s->pb);
2544         }
2545
2546         /* update mpeg1/2 vbv_delay for CBR */
2547         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2548            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2549             int vbv_delay;
2550
2551             assert(s->repeat_first_field==0);
2552
2553             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2554             assert(vbv_delay < 0xFFFF);
2555
2556             s->vbv_delay_ptr[0] &= 0xF8;
2557             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2558             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2559             s->vbv_delay_ptr[2] &= 0x07;
2560             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2561         }
2562         s->total_bits += s->frame_bits;
2563         avctx->frame_bits  = s->frame_bits;
2564     }else{
2565         assert((pbBufPtr(&s->pb) == s->pb.buf));
2566         s->frame_bits=0;
2567     }
2568     assert((s->frame_bits&7)==0);
2569
2570     return s->frame_bits/8;
2571 }
2572
2573 #endif //CONFIG_ENCODERS
2574
2575 static inline void gmc1_motion(MpegEncContext *s,
2576                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2577                                uint8_t **ref_picture)
2578 {
2579     uint8_t *ptr;
2580     int offset, src_x, src_y, linesize, uvlinesize;
2581     int motion_x, motion_y;
2582     int emu=0;
2583
2584     motion_x= s->sprite_offset[0][0];
2585     motion_y= s->sprite_offset[0][1];
2586     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2587     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2588     motion_x<<=(3-s->sprite_warping_accuracy);
2589     motion_y<<=(3-s->sprite_warping_accuracy);
2590     src_x = clip(src_x, -16, s->width);
2591     if (src_x == s->width)
2592         motion_x =0;
2593     src_y = clip(src_y, -16, s->height);
2594     if (src_y == s->height)
2595         motion_y =0;
2596
2597     linesize = s->linesize;
2598     uvlinesize = s->uvlinesize;
2599
2600     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2601
2602     if(s->flags&CODEC_FLAG_EMU_EDGE){
2603         if(   (unsigned)src_x >= s->h_edge_pos - 17
2604            || (unsigned)src_y >= s->v_edge_pos - 17){
2605             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2606             ptr= s->edge_emu_buffer;
2607         }
2608     }
2609
2610     if((motion_x|motion_y)&7){
2611         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2612         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2613     }else{
2614         int dxy;
2615
2616         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2617         if (s->no_rounding){
2618             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2619         }else{
2620             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2621         }
2622     }
2623
2624     if(s->flags&CODEC_FLAG_GRAY) return;
2625
2626     motion_x= s->sprite_offset[1][0];
2627     motion_y= s->sprite_offset[1][1];
2628     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2629     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2630     motion_x<<=(3-s->sprite_warping_accuracy);
2631     motion_y<<=(3-s->sprite_warping_accuracy);
2632     src_x = clip(src_x, -8, s->width>>1);
2633     if (src_x == s->width>>1)
2634         motion_x =0;
2635     src_y = clip(src_y, -8, s->height>>1);
2636     if (src_y == s->height>>1)
2637         motion_y =0;
2638
2639     offset = (src_y * uvlinesize) + src_x;
2640     ptr = ref_picture[1] + offset;
2641     if(s->flags&CODEC_FLAG_EMU_EDGE){
2642         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2643            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2644             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2645             ptr= s->edge_emu_buffer;
2646             emu=1;
2647         }
2648     }
2649     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2650
2651     ptr = ref_picture[2] + offset;
2652     if(emu){
2653         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2654         ptr= s->edge_emu_buffer;
2655     }
2656     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2657
2658     return;
2659 }
2660
2661 static inline void gmc_motion(MpegEncContext *s,
2662                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2663                                uint8_t **ref_picture)
2664 {
2665     uint8_t *ptr;
2666     int linesize, uvlinesize;
2667     const int a= s->sprite_warping_accuracy;
2668     int ox, oy;
2669
2670     linesize = s->linesize;
2671     uvlinesize = s->uvlinesize;
2672
2673     ptr = ref_picture[0];
2674
2675     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2676     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2677
2678     s->dsp.gmc(dest_y, ptr, linesize, 16,
2679            ox,
2680            oy,
2681            s->sprite_delta[0][0], s->sprite_delta[0][1],
2682            s->sprite_delta[1][0], s->sprite_delta[1][1],
2683            a+1, (1<<(2*a+1)) - s->no_rounding,
2684            s->h_edge_pos, s->v_edge_pos);
2685     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2686            ox + s->sprite_delta[0][0]*8,
2687            oy + s->sprite_delta[1][0]*8,
2688            s->sprite_delta[0][0], s->sprite_delta[0][1],
2689            s->sprite_delta[1][0], s->sprite_delta[1][1],
2690            a+1, (1<<(2*a+1)) - s->no_rounding,
2691            s->h_edge_pos, s->v_edge_pos);
2692
2693     if(s->flags&CODEC_FLAG_GRAY) return;
2694
2695     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2696     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2697
2698     ptr = ref_picture[1];
2699     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2700            ox,
2701            oy,
2702            s->sprite_delta[0][0], s->sprite_delta[0][1],
2703            s->sprite_delta[1][0], s->sprite_delta[1][1],
2704            a+1, (1<<(2*a+1)) - s->no_rounding,
2705            s->h_edge_pos>>1, s->v_edge_pos>>1);
2706
2707     ptr = ref_picture[2];
2708     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2709            ox,
2710            oy,
2711            s->sprite_delta[0][0], s->sprite_delta[0][1],
2712            s->sprite_delta[1][0], s->sprite_delta[1][1],
2713            a+1, (1<<(2*a+1)) - s->no_rounding,
2714            s->h_edge_pos>>1, s->v_edge_pos>>1);
2715 }
2716
2717 /**
2718  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2719  * @param buf destination buffer
2720  * @param src source buffer
2721  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2722  * @param block_w width of block
2723  * @param block_h height of block
2724  * @param src_x x coordinate of the top left sample of the block in the source buffer
2725  * @param src_y y coordinate of the top left sample of the block in the source buffer
2726  * @param w width of the source buffer
2727  * @param h height of the source buffer
2728  */
2729 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2730                                     int src_x, int src_y, int w, int h){
2731     int x, y;
2732     int start_y, start_x, end_y, end_x;
2733
2734     if(src_y>= h){
2735         src+= (h-1-src_y)*linesize;
2736         src_y=h-1;
2737     }else if(src_y<=-block_h){
2738         src+= (1-block_h-src_y)*linesize;
2739         src_y=1-block_h;
2740     }
2741     if(src_x>= w){
2742         src+= (w-1-src_x);
2743         src_x=w-1;
2744     }else if(src_x<=-block_w){
2745         src+= (1-block_w-src_x);
2746         src_x=1-block_w;
2747     }
2748
2749     start_y= FFMAX(0, -src_y);
2750     start_x= FFMAX(0, -src_x);
2751     end_y= FFMIN(block_h, h-src_y);
2752     end_x= FFMIN(block_w, w-src_x);
2753
2754     // copy existing part
2755     for(y=start_y; y<end_y; y++){
2756         for(x=start_x; x<end_x; x++){
2757             buf[x + y*linesize]= src[x + y*linesize];
2758         }
2759     }
2760
2761     //top
2762     for(y=0; y<start_y; y++){
2763         for(x=start_x; x<end_x; x++){
2764             buf[x + y*linesize]= buf[x + start_y*linesize];
2765         }
2766     }
2767
2768     //bottom
2769     for(y=end_y; y<block_h; y++){
2770         for(x=start_x; x<end_x; x++){
2771             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2772         }
2773     }
2774
2775     for(y=0; y<block_h; y++){
2776        //left
2777         for(x=0; x<start_x; x++){
2778             buf[x + y*linesize]= buf[start_x + y*linesize];
2779         }
2780
2781        //right
2782         for(x=end_x; x<block_w; x++){
2783             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2784         }
2785     }
2786 }
2787
2788 static inline int hpel_motion(MpegEncContext *s,
2789                                   uint8_t *dest, uint8_t *src,
2790                                   int field_based, int field_select,
2791                                   int src_x, int src_y,
2792                                   int width, int height, int stride,
2793                                   int h_edge_pos, int v_edge_pos,
2794                                   int w, int h, op_pixels_func *pix_op,
2795                                   int motion_x, int motion_y)
2796 {
2797     int dxy;
2798     int emu=0;
2799
2800     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2801     src_x += motion_x >> 1;
2802     src_y += motion_y >> 1;
2803
2804     /* WARNING: do no forget half pels */
2805     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2806     if (src_x == width)
2807         dxy &= ~1;
2808     src_y = clip(src_y, -16, height);
2809     if (src_y == height)
2810         dxy &= ~2;
2811     src += src_y * stride + src_x;
2812
2813     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2814         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2815            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2816             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2817                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2818             src= s->edge_emu_buffer;
2819             emu=1;
2820         }
2821     }
2822     if(field_select)
2823         src += s->linesize;
2824     pix_op[dxy](dest, src, stride, h);
2825     return emu;
2826 }
2827
2828 static inline int hpel_motion_lowres(MpegEncContext *s,
2829                                   uint8_t *dest, uint8_t *src,
2830                                   int field_based, int field_select,
2831                                   int src_x, int src_y,
2832                                   int width, int height, int stride,
2833                                   int h_edge_pos, int v_edge_pos,
2834                                   int w, int h, h264_chroma_mc_func *pix_op,
2835                                   int motion_x, int motion_y)
2836 {
2837     const int lowres= s->avctx->lowres;
2838     const int s_mask= (2<<lowres)-1;
2839     int emu=0;
2840     int sx, sy;
2841
2842     if(s->quarter_sample){
2843         motion_x/=2;
2844         motion_y/=2;
2845     }
2846
2847     sx= motion_x & s_mask;
2848     sy= motion_y & s_mask;
2849     src_x += motion_x >> (lowres+1);
2850     src_y += motion_y >> (lowres+1);
2851
2852     src += src_y * stride + src_x;
2853
2854     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2855        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2856         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2857                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2858         src= s->edge_emu_buffer;
2859         emu=1;
2860     }
2861
2862     sx <<= 2 - lowres;
2863     sy <<= 2 - lowres;
2864     if(field_select)
2865         src += s->linesize;
2866     pix_op[lowres](dest, src, stride, h, sx, sy);
2867     return emu;
2868 }
2869
2870 /* apply one mpeg motion vector to the three components */
2871 static always_inline void mpeg_motion(MpegEncContext *s,
2872                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2873                                int field_based, int bottom_field, int field_select,
2874                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2875                                int motion_x, int motion_y, int h)
2876 {
2877     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2878     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2879
2880 #if 0
2881 if(s->quarter_sample)
2882 {
2883     motion_x>>=1;
2884     motion_y>>=1;
2885 }
2886 #endif
2887
2888     v_edge_pos = s->v_edge_pos >> field_based;
2889     linesize   = s->current_picture.linesize[0] << field_based;
2890     uvlinesize = s->current_picture.linesize[1] << field_based;
2891
2892     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2893     src_x = s->mb_x* 16               + (motion_x >> 1);
2894     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2895
2896     if (s->out_format == FMT_H263) {
2897         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2898             mx = (motion_x>>1)|(motion_x&1);
2899             my = motion_y >>1;
2900             uvdxy = ((my & 1) << 1) | (mx & 1);
2901             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2902             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2903         }else{
2904             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2905             uvsrc_x = src_x>>1;
2906             uvsrc_y = src_y>>1;
2907         }
2908     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2909         mx = motion_x / 4;
2910         my = motion_y / 4;
2911         uvdxy = 0;
2912         uvsrc_x = s->mb_x*8 + mx;
2913         uvsrc_y = s->mb_y*8 + my;
2914     } else {
2915         if(s->chroma_y_shift){
2916             mx = motion_x / 2;
2917             my = motion_y / 2;
2918             uvdxy = ((my & 1) << 1) | (mx & 1);
2919             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2920             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2921         } else {
2922             if(s->chroma_x_shift){
2923             //Chroma422
2924                 mx = motion_x / 2;
2925                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2926                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2927                 uvsrc_y = src_y;
2928             } else {
2929             //Chroma444
2930                 uvdxy = dxy;
2931                 uvsrc_x = src_x;
2932                 uvsrc_y = src_y;
2933             }
2934         }
2935     }
2936
2937     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2938     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2939     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2940
2941     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2942        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2943             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2944                s->codec_id == CODEC_ID_MPEG1VIDEO){
2945                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2946                 return ;
2947             }
2948             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2949                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2950             ptr_y = s->edge_emu_buffer;
2951             if(!(s->flags&CODEC_FLAG_GRAY)){
2952                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2953                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2954                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2955                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2956                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2957                 ptr_cb= uvbuf;
2958                 ptr_cr= uvbuf+16;
2959             }
2960     }
2961
2962     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2963         dest_y += s->linesize;
2964         dest_cb+= s->uvlinesize;
2965         dest_cr+= s->uvlinesize;
2966     }
2967
2968     if(field_select){
2969         ptr_y += s->linesize;
2970         ptr_cb+= s->uvlinesize;
2971         ptr_cr+= s->uvlinesize;
2972     }
2973
2974     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2975
2976     if(!(s->flags&CODEC_FLAG_GRAY)){
2977         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2978         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2979     }
2980 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
2981     if(s->out_format == FMT_H261){
2982         ff_h261_loop_filter(s);
2983     }
2984 #endif
2985 }
2986
2987 /* apply one mpeg motion vector to the three components */
2988 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2989                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2990                                int field_based, int bottom_field, int field_select,
2991                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2992                                int motion_x, int motion_y, int h)
2993 {
2994     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2995     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2996     const int lowres= s->avctx->lowres;
2997     const int block_s= 8>>lowres;
2998     const int s_mask= (2<<lowres)-1;
2999     const int h_edge_pos = s->h_edge_pos >> lowres;
3000     const int v_edge_pos = s->v_edge_pos >> lowres;
3001     linesize   = s->current_picture.linesize[0] << field_based;
3002     uvlinesize = s->current_picture.linesize[1] << field_based;
3003
3004     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3005         motion_x/=2;
3006         motion_y/=2;
3007     }
3008
3009     if(field_based){
3010         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3011     }
3012
3013     sx= motion_x & s_mask;
3014     sy= motion_y & s_mask;
3015     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3016     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3017
3018     if (s->out_format == FMT_H263) {
3019         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3020         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3021         uvsrc_x = src_x>>1;
3022         uvsrc_y = src_y>>1;
3023     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3024         mx = motion_x / 4;
3025         my = motion_y / 4;
3026         uvsx = (2*mx) & s_mask;
3027         uvsy = (2*my) & s_mask;
3028         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3029         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3030     } else {
3031         mx = motion_x / 2;
3032         my = motion_y / 2;
3033         uvsx = mx & s_mask;
3034         uvsy = my & s_mask;
3035         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3036         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3037     }
3038
3039     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3040     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3041     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3042
3043     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3044        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3045             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3046                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3047             ptr_y = s->edge_emu_buffer;
3048             if(!(s->flags&CODEC_FLAG_GRAY)){
3049                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3050                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3051                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3052                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3053                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3054                 ptr_cb= uvbuf;
3055                 ptr_cr= uvbuf+16;
3056             }
3057     }
3058
3059     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3060         dest_y += s->linesize;
3061         dest_cb+= s->uvlinesize;
3062         dest_cr+= s->uvlinesize;
3063     }
3064
3065     if(field_select){
3066         ptr_y += s->linesize;
3067         ptr_cb+= s->uvlinesize;
3068         ptr_cr+= s->uvlinesize;
3069     }
3070
3071     sx <<= 2 - lowres;
3072     sy <<= 2 - lowres;
3073     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3074
3075     if(!(s->flags&CODEC_FLAG_GRAY)){
3076         uvsx <<= 2 - lowres;
3077         uvsy <<= 2 - lowres;
3078         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3079         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3080     }
3081     //FIXME h261 lowres loop filter
3082 }
3083
3084 //FIXME move to dsputil, avg variant, 16x16 version
3085 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3086     int x;
3087     uint8_t * const top   = src[1];
3088     uint8_t * const left  = src[2];
3089     uint8_t * const mid   = src[0];
3090     uint8_t * const right = src[3];
3091     uint8_t * const bottom= src[4];
3092 #define OBMC_FILTER(x, t, l, m, r, b)\
3093     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3094 #define OBMC_FILTER4(x, t, l, m, r, b)\
3095     OBMC_FILTER(x         , t, l, m, r, b);\
3096     OBMC_FILTER(x+1       , t, l, m, r, b);\
3097     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3098     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3099
3100     x=0;
3101     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3102     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3103     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3104     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3105     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3106     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3107     x+= stride;
3108     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3109     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3110     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3111     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3112     x+= stride;
3113     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3114     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3115     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3116     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3117     x+= 2*stride;
3118     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3119     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3120     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3121     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3122     x+= 2*stride;
3123     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3124     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3125     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3126     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3127     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3128     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3129     x+= stride;
3130     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3131     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3132     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3133     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3134 }
3135
3136 /* obmc for 1 8x8 luma block */
3137 static inline void obmc_motion(MpegEncContext *s,
3138                                uint8_t *dest, uint8_t *src,
3139                                int src_x, int src_y,
3140                                op_pixels_func *pix_op,
3141                                int16_t mv[5][2]/* mid top left right bottom*/)
3142 #define MID    0
3143 {
3144     int i;
3145     uint8_t *ptr[5];
3146
3147     assert(s->quarter_sample==0);
3148
3149     for(i=0; i<5; i++){
3150         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3151             ptr[i]= ptr[MID];
3152         }else{
3153             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3154             hpel_motion(s, ptr[i], src, 0, 0,
3155                         src_x, src_y,
3156                         s->width, s->height, s->linesize,
3157                         s->h_edge_pos, s->v_edge_pos,
3158                         8, 8, pix_op,
3159                         mv[i][0], mv[i][1]);
3160         }
3161     }
3162
3163     put_obmc(dest, ptr, s->linesize);
3164 }
3165
3166 static inline void qpel_motion(MpegEncContext *s,
3167                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3168                                int field_based, int bottom_field, int field_select,
3169                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3170                                qpel_mc_func (*qpix_op)[16],
3171                                int motion_x, int motion_y, int h)
3172 {
3173     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3174     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3175
3176     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3177     src_x = s->mb_x *  16                 + (motion_x >> 2);
3178     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3179
3180     v_edge_pos = s->v_edge_pos >> field_based;
3181     linesize = s->linesize << field_based;
3182     uvlinesize = s->uvlinesize << field_based;
3183
3184     if(field_based){
3185         mx= motion_x/2;
3186         my= motion_y>>1;
3187     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3188         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3189         mx= (motion_x>>1) + rtab[motion_x&7];
3190         my= (motion_y>>1) + rtab[motion_y&7];
3191     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3192         mx= (motion_x>>1)|(motion_x&1);
3193         my= (motion_y>>1)|(motion_y&1);
3194     }else{
3195         mx= motion_x/2;
3196         my= motion_y/2;
3197     }
3198     mx= (mx>>1)|(mx&1);
3199     my= (my>>1)|(my&1);
3200
3201     uvdxy= (mx&1) | ((my&1)<<1);
3202     mx>>=1;
3203     my>>=1;
3204
3205     uvsrc_x = s->mb_x *  8                 + mx;
3206     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3207
3208     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3209     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3210     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3211
3212     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3213        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3214         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3215                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3216         ptr_y= s->edge_emu_buffer;
3217         if(!(s->flags&CODEC_FLAG_GRAY)){
3218             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3219             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3220                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3221             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3222                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3223             ptr_cb= uvbuf;
3224             ptr_cr= uvbuf + 16;
3225         }
3226     }
3227
3228     if(!field_based)
3229         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3230     else{
3231         if(bottom_field){
3232             dest_y += s->linesize;
3233             dest_cb+= s->uvlinesize;
3234             dest_cr+= s->uvlinesize;
3235         }
3236
3237         if(field_select){
3238             ptr_y  += s->linesize;
3239             ptr_cb += s->uvlinesize;
3240             ptr_cr += s->uvlinesize;
3241         }
3242         //damn interlaced mode
3243         //FIXME boundary mirroring is not exactly correct here
3244         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3245         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3246     }
3247     if(!(s->flags&CODEC_FLAG_GRAY)){
3248         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3249         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3250     }
3251 }
3252
3253 inline int ff_h263_round_chroma(int x){
3254     if (x >= 0)
3255         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3256     else {
3257         x = -x;
3258         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3259     }
3260 }
3261
3262 /**
3263  * h263 chorma 4mv motion compensation.
3264  */
3265 static inline void chroma_4mv_motion(MpegEncContext *s,
3266                                      uint8_t *dest_cb, uint8_t *dest_cr,
3267                                      uint8_t **ref_picture,
3268                                      op_pixels_func *pix_op,
3269                                      int mx, int my){
3270     int dxy, emu=0, src_x, src_y, offset;
3271     uint8_t *ptr;
3272
3273     /* In case of 8X8, we construct a single chroma motion vector
3274        with a special rounding */
3275     mx= ff_h263_round_chroma(mx);
3276     my= ff_h263_round_chroma(my);
3277
3278     dxy = ((my & 1) << 1) | (mx & 1);
3279     mx >>= 1;
3280     my >>= 1;
3281
3282     src_x = s->mb_x * 8 + mx;
3283     src_y = s->mb_y * 8 + my;
3284     src_x = clip(src_x, -8, s->width/2);
3285     if (src_x == s->width/2)
3286         dxy &= ~1;
3287     src_y = clip(src_y, -8, s->height/2);
3288     if (src_y == s->height/2)
3289         dxy &= ~2;
3290
3291     offset = (src_y * (s->uvlinesize)) + src_x;
3292     ptr = ref_picture[1] + offset;
3293     if(s->flags&CODEC_FLAG_EMU_EDGE){
3294         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3295            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3296             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3297             ptr= s->edge_emu_buffer;
3298             emu=1;
3299         }
3300     }
3301     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3302
3303     ptr = ref_picture[2] + offset;
3304     if(emu){
3305         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3306         ptr= s->edge_emu_buffer;
3307     }
3308     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3309 }
3310
3311 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3312                                      uint8_t *dest_cb, uint8_t *dest_cr,
3313                                      uint8_t **ref_picture,
3314                                      h264_chroma_mc_func *pix_op,
3315                                      int mx, int my){
3316     const int lowres= s->avctx->lowres;
3317     const int block_s= 8>>lowres;
3318     const int s_mask= (2<<lowres)-1;
3319     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3320     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3321     int emu=0, src_x, src_y, offset, sx, sy;
3322     uint8_t *ptr;
3323
3324     if(s->quarter_sample){
3325         mx/=2;
3326         my/=2;
3327     }
3328
3329     /* In case of 8X8, we construct a single chroma motion vector
3330        with a special rounding */
3331     mx= ff_h263_round_chroma(mx);
3332     my= ff_h263_round_chroma(my);
3333
3334     sx= mx & s_mask;
3335     sy= my & s_mask;
3336     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3337     src_y = s->mb_y*block_s + (my >> (lowres+1));
3338
3339     offset = src_y * s->uvlinesize + src_x;
3340     ptr = ref_picture[1] + offset;
3341     if(s->flags&CODEC_FLAG_EMU_EDGE){
3342         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3343            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3344             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3345             ptr= s->edge_emu_buffer;
3346             emu=1;
3347         }
3348     }
3349     sx <<= 2 - lowres;
3350     sy <<= 2 - lowres;
3351     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3352
3353     ptr = ref_picture[2] + offset;
3354     if(emu){
3355         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3356         ptr= s->edge_emu_buffer;
3357     }
3358     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3359 }
3360
3361 /**
3362  * motion compensation of a single macroblock
3363  * @param s context
3364  * @param dest_y luma destination pointer
3365  * @param dest_cb chroma cb/u destination pointer
3366  * @param dest_cr chroma cr/v destination pointer
3367  * @param dir direction (0->forward, 1->backward)
3368  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3369  * @param pic_op halfpel motion compensation function (average or put normally)
3370  * @param pic_op qpel motion compensation function (average or put normally)
3371  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3372  */
3373 static inline void MPV_motion(MpegEncContext *s,
3374                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3375                               int dir, uint8_t **ref_picture,
3376                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3377 {
3378     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3379     int mb_x, mb_y, i;
3380     uint8_t *ptr, *dest;
3381
3382     mb_x = s->mb_x;
3383     mb_y = s->mb_y;
3384
3385     if(s->obmc && s->pict_type != B_TYPE){
3386         int16_t mv_cache[4][4][2];
3387         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3388         const int mot_stride= s->b8_stride;
3389         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3390
3391         assert(!s->mb_skipped);
3392
3393         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3394         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3395         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3396
3397         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3398             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3399         }else{
3400             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3401         }
3402
3403         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3404             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3405             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3406         }else{
3407             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3408             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3409         }
3410
3411         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3412             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3413             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3414         }else{
3415             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3416             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3417         }
3418
3419         mx = 0;
3420         my = 0;
3421         for(i=0;i<4;i++) {
3422             const int x= (i&1)+1;
3423             const int y= (i>>1)+1;
3424             int16_t mv[5][2]= {
3425                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3426                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3427                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3428                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3429                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3430             //FIXME cleanup
3431             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3432                         ref_picture[0],
3433                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3434                         pix_op[1],
3435                         mv);
3436
3437             mx += mv[0][0];
3438             my += mv[0][1];
3439         }
3440         if(!(s->flags&CODEC_FLAG_GRAY))
3441             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3442
3443         return;
3444     }
3445
3446     switch(s->mv_type) {
3447     case MV_TYPE_16X16:
3448         if(s->mcsel){
3449             if(s->real_sprite_warping_points==1){
3450                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3451                             ref_picture);
3452             }else{
3453                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3454                             ref_picture);
3455             }
3456         }else if(s->quarter_sample){
3457             qpel_motion(s, dest_y, dest_cb, dest_cr,
3458                         0, 0, 0,
3459                         ref_picture, pix_op, qpix_op,
3460                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3461         }else if(s->mspel){
3462             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3463                         ref_picture, pix_op,
3464                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3465         }else
3466         {
3467             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3468                         0, 0, 0,
3469                         ref_picture, pix_op,
3470                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3471         }
3472         break;
3473     case MV_TYPE_8X8:
3474         mx = 0;
3475         my = 0;
3476         if(s->quarter_sample){
3477             for(i=0;i<4;i++) {
3478                 motion_x = s->mv[dir][i][0];
3479                 motion_y = s->mv[dir][i][1];
3480
3481                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3482                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3483                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3484
3485                 /* WARNING: do no forget half pels */
3486                 src_x = clip(src_x, -16, s->width);
3487                 if (src_x == s->width)
3488                     dxy &= ~3;
3489                 src_y = clip(src_y, -16, s->height);
3490                 if (src_y == s->height)
3491                     dxy &= ~12;
3492
3493                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3494                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3495                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3496                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3497                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3498                         ptr= s->edge_emu_buffer;
3499                     }
3500                 }
3501                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3502                 qpix_op[1][dxy](dest, ptr, s->linesize);
3503
3504                 mx += s->mv[dir][i][0]/2;
3505                 my += s->mv[dir][i][1]/2;
3506             }
3507         }else{
3508             for(i=0;i<4;i++) {
3509                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3510                             ref_picture[0], 0, 0,
3511                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3512                             s->width, s->height, s->linesize,
3513                             s->h_edge_pos, s->v_edge_pos,
3514                             8, 8, pix_op[1],
3515                             s->mv[dir][i][0], s->mv[dir][i][1]);
3516
3517                 mx += s->mv[dir][i][0];
3518                 my += s->mv[dir][i][1];
3519             }
3520         }
3521
3522         if(!(s->flags&CODEC_FLAG_GRAY))
3523             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3524         break;
3525     case MV_TYPE_FIELD:
3526         if (s->picture_structure == PICT_FRAME) {
3527             if(s->quarter_sample){
3528                 for(i=0; i<2; i++){
3529                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3530                                 1, i, s->field_select[dir][i],
3531                                 ref_picture, pix_op, qpix_op,
3532                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3533                 }
3534             }else{
3535                 /* top field */
3536                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3537                             1, 0, s->field_select[dir][0],
3538                             ref_picture, pix_op,
3539                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3540                 /* bottom field */
3541                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3542                             1, 1, s->field_select[dir][1],
3543                             ref_picture, pix_op,
3544                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3545             }
3546         } else {
3547             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3548                 ref_picture= s->current_picture_ptr->data;
3549             }
3550
3551             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3552                         0, 0, s->field_select[dir][0],
3553                         ref_picture, pix_op,
3554                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3555         }
3556         break;
3557     case MV_TYPE_16X8:
3558         for(i=0; i<2; i++){
3559             uint8_t ** ref2picture;
3560
3561             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3562                 ref2picture= ref_picture;
3563             }else{
3564                 ref2picture= s->current_picture_ptr->data;
3565             }
3566
3567             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3568                         0, 0, s->field_select[dir][i],
3569                         ref2picture, pix_op,
3570                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3571
3572             dest_y += 16*s->linesize;
3573             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3574             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3575         }
3576         break;
3577     case MV_TYPE_DMV:
3578         if(s->picture_structure == PICT_FRAME){
3579             for(i=0; i<2; i++){
3580                 int j;
3581                 for(j=0; j<2; j++){
3582                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3583                                 1, j, j^i,
3584                                 ref_picture, pix_op,
3585                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3586                 }
3587                 pix_op = s->dsp.avg_pixels_tab;
3588             }
3589         }else{
3590             for(i=0; i<2; i++){
3591                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3592                             0, 0, s->picture_structure != i+1,
3593                             ref_picture, pix_op,
3594                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3595
3596                 // after put we make avg of the same block
3597                 pix_op=s->dsp.avg_pixels_tab;
3598
3599                 //opposite parity is always in the same frame if this is second field
3600                 if(!s->first_field){
3601                     ref_picture = s->current_picture_ptr->data;
3602                 }
3603             }
3604         }
3605     break;
3606     default: assert(0);
3607     }
3608 }
3609
3610 /**
3611  * motion compensation of a single macroblock
3612  * @param s context
3613  * @param dest_y luma destination pointer
3614  * @param dest_cb chroma cb/u destination pointer
3615  * @param dest_cr chroma cr/v destination pointer
3616  * @param dir direction (0->forward, 1->backward)
3617  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3618  * @param pic_op halfpel motion compensation function (average or put normally)
3619  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3620  */
3621 static inline void MPV_motion_lowres(MpegEncContext *s,
3622                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3623                               int dir, uint8_t **ref_picture,
3624                               h264_chroma_mc_func *pix_op)
3625 {
3626     int mx, my;
3627     int mb_x, mb_y, i;
3628     const int lowres= s->avctx->lowres;
3629     const int block_s= 8>>lowres;
3630
3631     mb_x = s->mb_x;
3632     mb_y = s->mb_y;
3633
3634     switch(s->mv_type) {
3635     case MV_TYPE_16X16:
3636         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3637                     0, 0, 0,
3638                     ref_picture, pix_op,
3639                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3640         break;
3641     case MV_TYPE_8X8:
3642         mx = 0;
3643         my = 0;
3644             for(i=0;i<4;i++) {
3645                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3646                             ref_picture[0], 0, 0,
3647                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3648                             s->width, s->height, s->linesize,
3649                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3650                             block_s, block_s, pix_op,
3651                             s->mv[dir][i][0], s->mv[dir][i][1]);
3652
3653                 mx += s->mv[dir][i][0];
3654                 my += s->mv[dir][i][1];
3655             }
3656
3657         if(!(s->flags&CODEC_FLAG_GRAY))
3658             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3659         break;
3660     case MV_TYPE_FIELD:
3661         if (s->picture_structure == PICT_FRAME) {
3662             /* top field */
3663             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3664                         1, 0, s->field_select[dir][0],
3665                         ref_picture, pix_op,
3666                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3667             /* bottom field */
3668             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3669                         1, 1, s->field_select[dir][1],
3670                         ref_picture, pix_op,
3671                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3672         } else {
3673             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3674                 ref_picture= s->current_picture_ptr->data;
3675             }
3676
3677             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3678                         0, 0, s->field_select[dir][0],
3679                         ref_picture, pix_op,
3680                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3681         }
3682         break;
3683     case MV_TYPE_16X8:
3684         for(i=0; i<2; i++){
3685             uint8_t ** ref2picture;
3686
3687             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3688                 ref2picture= ref_picture;
3689             }else{
3690                 ref2picture= s->current_picture_ptr->data;
3691             }
3692
3693             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3694                         0, 0, s->field_select[dir][i],
3695                         ref2picture, pix_op,
3696                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3697
3698             dest_y += 2*block_s*s->linesize;
3699             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3700             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3701         }
3702         break;
3703     case MV_TYPE_DMV:
3704         if(s->picture_structure == PICT_FRAME){
3705             for(i=0; i<2; i++){
3706                 int j;
3707                 for(j=0; j<2; j++){
3708                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3709                                 1, j, j^i,
3710                                 ref_picture, pix_op,
3711                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3712                 }
3713                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3714             }
3715         }else{
3716             for(i=0; i<2; i++){
3717                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3718                             0, 0, s->picture_structure != i+1,
3719                             ref_picture, pix_op,
3720                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3721
3722                 // after put we make avg of the same block
3723                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3724
3725                 //opposite parity is always in the same frame if this is second field
3726                 if(!s->first_field){
3727                     ref_picture = s->current_picture_ptr->data;
3728                 }
3729             }
3730         }
3731     break;
3732     default: assert(0);
3733     }
3734 }
3735
3736 /* put block[] to dest[] */
3737 static inline void put_dct(MpegEncContext *s,
3738                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3739 {
3740     s->dct_unquantize_intra(s, block, i, qscale);
3741     s->dsp.idct_put (dest, line_size, block);
3742 }
3743
3744 /* add block[] to dest[] */
3745 static inline void add_dct(MpegEncContext *s,
3746                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3747 {
3748     if (s->block_last_index[i] >= 0) {
3749         s->dsp.idct_add (dest, line_size, block);
3750     }
3751 }
3752
3753 static inline void add_dequant_dct(MpegEncContext *s,
3754                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3755 {
3756     if (s->block_last_index[i] >= 0) {
3757         s->dct_unquantize_inter(s, block, i, qscale);
3758
3759         s->dsp.idct_add (dest, line_size, block);
3760     }
3761 }
3762
3763 /**
3764  * cleans dc, ac, coded_block for the current non intra MB
3765  */
3766 void ff_clean_intra_table_entries(MpegEncContext *s)
3767 {
3768     int wrap = s->b8_stride;
3769     int xy = s->block_index[0];
3770
3771     s->dc_val[0][xy           ] =
3772     s->dc_val[0][xy + 1       ] =
3773     s->dc_val[0][xy     + wrap] =
3774     s->dc_val[0][xy + 1 + wrap] = 1024;
3775     /* ac pred */
3776     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3777     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3778     if (s->msmpeg4_version>=3) {
3779         s->coded_block[xy           ] =
3780         s->coded_block[xy + 1       ] =
3781         s->coded_block[xy     + wrap] =
3782         s->coded_block[xy + 1 + wrap] = 0;
3783     }
3784     /* chroma */
3785     wrap = s->mb_stride;
3786     xy = s->mb_x + s->mb_y * wrap;
3787     s->dc_val[1][xy] =
3788     s->dc_val[2][xy] = 1024;
3789     /* ac pred */
3790     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3791     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3792
3793     s->mbintra_table[xy]= 0;
3794 }
3795
3796 /* generic function called after a macroblock has been parsed by the
3797    decoder or after it has been encoded by the encoder.
3798
3799    Important variables used:
3800    s->mb_intra : true if intra macroblock
3801    s->mv_dir   : motion vector direction
3802    s->mv_type  : motion vector type
3803    s->mv       : motion vector
3804    s->interlaced_dct : true if interlaced dct used (mpeg2)
3805  */
3806 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3807 {
3808     int mb_x, mb_y;
3809     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3810 #ifdef HAVE_XVMC
3811     if(s->avctx->xvmc_acceleration){
3812         XVMC_decode_mb(s);//xvmc uses pblocks
3813         return;
3814     }
3815 #endif
3816
3817     mb_x = s->mb_x;
3818     mb_y = s->mb_y;
3819
3820     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3821        /* save DCT coefficients */
3822        int i,j;
3823        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3824        for(i=0; i<6; i++)
3825            for(j=0; j<64; j++)
3826                *dct++ = block[i][s->dsp.idct_permutation[j]];
3827     }
3828
3829     s->current_picture.qscale_table[mb_xy]= s->qscale;
3830
3831     /* update DC predictors for P macroblocks */
3832     if (!s->mb_intra) {
3833         if (s->h263_pred || s->h263_aic) {
3834             if(s->mbintra_table[mb_xy])
3835                 ff_clean_intra_table_entries(s);
3836         } else {
3837             s->last_dc[0] =
3838             s->last_dc[1] =
3839             s->last_dc[2] = 128 << s->intra_dc_precision;
3840         }
3841     }
3842     else if (s->h263_pred || s->h263_aic)
3843         s->mbintra_table[mb_xy]=1;
3844
3845     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3846         uint8_t *dest_y, *dest_cb, *dest_cr;
3847         int dct_linesize, dct_offset;
3848         op_pixels_func (*op_pix)[4];
3849         qpel_mc_func (*op_qpix)[16];
3850         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3851         const int uvlinesize= s->current_picture.linesize[1];
3852         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3853         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3854
3855         /* avoid copy if macroblock skipped in last frame too */
3856         /* skip only during decoding as we might trash the buffers during encoding a bit */
3857         if(!s->encoding){
3858             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3859             const int age= s->current_picture.age;
3860
3861             assert(age);
3862
3863             if (s->mb_skipped) {
3864                 s->mb_skipped= 0;
3865                 assert(s->pict_type!=I_TYPE);
3866
3867                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3868                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3869
3870                 /* if previous was skipped too, then nothing to do !  */
3871                 if (*mbskip_ptr >= age && s->current_picture.reference){
3872                     return;
3873                 }
3874             } else if(!s->current_picture.reference){
3875                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3876                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3877             } else{
3878                 *mbskip_ptr = 0; /* not skipped */
3879             }
3880         }
3881
3882         dct_linesize = linesize << s->interlaced_dct;
3883         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3884
3885         if(readable){
3886             dest_y=  s->dest[0];
3887             dest_cb= s->dest[1];
3888             dest_cr= s->dest[2];
3889         }else{
3890             dest_y = s->b_scratchpad;
3891             dest_cb= s->b_scratchpad+16*linesize;
3892             dest_cr= s->b_scratchpad+32*linesize;
3893         }
3894
3895         if (!s->mb_intra) {
3896             /* motion handling */
3897             /* decoding or more than one mb_type (MC was already done otherwise) */
3898             if(!s->encoding){
3899                 if(lowres_flag){
3900                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3901
3902                     if (s->mv_dir & MV_DIR_FORWARD) {
3903                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3904                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3905                     }
3906                     if (s->mv_dir & MV_DIR_BACKWARD) {
3907                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3908                     }
3909                 }else{
3910                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3911                         op_pix = s->dsp.put_pixels_tab;
3912                         op_qpix= s->dsp.put_qpel_pixels_tab;
3913                     }else{
3914                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3915                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3916                     }
3917                     if (s->mv_dir & MV_DIR_FORWARD) {
3918                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3919                         op_pix = s->dsp.avg_pixels_tab;
3920                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3921                     }
3922                     if (s->mv_dir & MV_DIR_BACKWARD) {
3923                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3924                     }
3925                 }
3926             }
3927
3928             /* skip dequant / idct if we are really late ;) */
3929             if(s->hurry_up>1) goto skip_idct;
3930             if(s->avctx->skip_idct){
3931                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3932                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3933                    || s->avctx->skip_idct >= AVDISCARD_ALL)
3934                     goto skip_idct;
3935             }
3936
3937             /* add dct residue */
3938             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3939                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3940                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3941                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3942                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3943                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3944
3945                 if(!(s->flags&CODEC_FLAG_GRAY)){
3946                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3947                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3948                 }
3949             } else if(s->codec_id != CODEC_ID_WMV2){
3950                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3951                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3952                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3953                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3954
3955                 if(!(s->flags&CODEC_FLAG_GRAY)){
3956                     if(s->chroma_y_shift){//Chroma420
3957                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3958                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3959                     }else{
3960                         //chroma422
3961                         dct_linesize = uvlinesize << s->interlaced_dct;
3962                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3963
3964                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3965                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3966                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3967                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3968                         if(!s->chroma_x_shift){//Chroma444
3969                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3970                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3971                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3972                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3973                         }
3974                     }
3975                 }//fi gray
3976             }
3977             else{
3978                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3979             }
3980         } else {
3981             /* dct only in intra block */
3982             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3983                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3984                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3985                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3986                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3987
3988                 if(!(s->flags&CODEC_FLAG_GRAY)){
3989                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3990                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3991                 }
3992             }else{
3993                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
3994                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
3995                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
3996                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
3997
3998                 if(!(s->flags&CODEC_FLAG_GRAY)){
3999                     if(s->chroma_y_shift){
4000                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4001                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4002                     }else{
4003
4004                         dct_linesize = uvlinesize << s->interlaced_dct;
4005                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4006
4007                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4008                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4009                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4010                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4011                         if(!s->chroma_x_shift){//Chroma444
4012                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4013                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4014                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4015                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4016                         }
4017                     }
4018                 }//gray
4019             }
4020         }
4021 skip_idct:
4022         if(!readable){
4023             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4024             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4025             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4026         }
4027     }
4028 }
4029
4030 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4031     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4032     else                  MPV_decode_mb_internal(s, block, 0);
4033 }
4034
4035 #ifdef CONFIG_ENCODERS
4036
4037 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4038 {
4039     static const char tab[64]=
4040         {3,2,2,1,1,1,1,1,
4041          1,1,1,1,1,1,1,1,
4042          1,1,1,1,1,1,1,1,
4043          0,0,0,0,0,0,0,0,
4044          0,0,0,0,0,0,0,0,
4045          0,0,0,0,0,0,0,0,
4046          0,0,0,0,0,0,0,0,
4047          0,0,0,0,0,0,0,0};
4048     int score=0;
4049     int run=0;
4050     int i;
4051     DCTELEM *block= s->block[n];
4052     const int last_index= s->block_last_index[n];
4053     int skip_dc;
4054
4055     if(threshold<0){
4056         skip_dc=0;
4057         threshold= -threshold;
4058     }else
4059         skip_dc=1;
4060
4061     /* are all which we could set to zero are allready zero? */
4062     if(last_index<=skip_dc - 1) return;
4063
4064     for(i=0; i<=last_index; i++){
4065         const int j = s->intra_scantable.permutated[i];
4066         const int level = ABS(block[j]);
4067         if(level==1){
4068             if(skip_dc && i==0) continue;
4069             score+= tab[run];
4070             run=0;
4071         }else if(level>1){
4072             return;
4073         }else{
4074             run++;
4075         }
4076     }
4077     if(score >= threshold) return;
4078     for(i=skip_dc; i<=last_index; i++){
4079         const int j = s->intra_scantable.permutated[i];
4080         block[j]=0;
4081     }
4082     if(block[0]) s->block_last_index[n]= 0;
4083     else         s->block_last_index[n]= -1;
4084 }
4085
4086 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4087 {
4088     int i;
4089     const int maxlevel= s->max_qcoeff;
4090     const int minlevel= s->min_qcoeff;
4091     int overflow=0;
4092
4093     if(s->mb_intra){
4094         i=1; //skip clipping of intra dc
4095     }else
4096         i=0;
4097
4098     for(;i<=last_index; i++){
4099         const int j= s->intra_scantable.permutated[i];
4100         int level = block[j];
4101
4102         if     (level>maxlevel){
4103             level=maxlevel;
4104             overflow++;
4105         }else if(level<minlevel){
4106             level=minlevel;
4107             overflow++;
4108         }
4109
4110         block[j]= level;
4111     }
4112
4113     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4114         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4115 }
4116
4117 #endif //CONFIG_ENCODERS
4118
4119 /**
4120  *
4121  * @param h is the normal height, this will be reduced automatically if needed for the last row
4122  */
4123 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4124     if (s->avctx->draw_horiz_band) {
4125         AVFrame *src;
4126         int offset[4];
4127
4128         if(s->picture_structure != PICT_FRAME){
4129             h <<= 1;
4130             y <<= 1;
4131             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4132         }
4133
4134         h= FFMIN(h, s->avctx->height - y);
4135
4136         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4137             src= (AVFrame*)s->current_picture_ptr;
4138         else if(s->last_picture_ptr)
4139             src= (AVFrame*)s->last_picture_ptr;
4140         else
4141             return;
4142
4143         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4144             offset[0]=
4145             offset[1]=
4146             offset[2]=
4147             offset[3]= 0;
4148         }else{
4149             offset[0]= y * s->linesize;;
4150             offset[1]=
4151             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4152             offset[3]= 0;
4153         }
4154
4155         emms_c();
4156
4157         s->avctx->draw_horiz_band(s->avctx, src, offset,
4158                                   y, s->picture_structure, h);
4159     }
4160 }
4161
4162 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4163     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4164     const int uvlinesize= s->current_picture.linesize[1];
4165     const int mb_size= 4 - s->avctx->lowres;
4166
4167     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4168     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4169     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4170     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4171     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4172     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4173     //block_index is not used by mpeg2, so it is not affected by chroma_format
4174
4175     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4176     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4177     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4178
4179     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4180     {
4181         s->dest[0] += s->mb_y *   linesize << mb_size;
4182         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4183         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4184     }
4185 }
4186
4187 #ifdef CONFIG_ENCODERS
4188
4189 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4190     int x, y;
4191 //FIXME optimize
4192     for(y=0; y<8; y++){
4193         for(x=0; x<8; x++){
4194             int x2, y2;
4195             int sum=0;
4196             int sqr=0;
4197             int count=0;
4198
4199             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4200                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4201                     int v= ptr[x2 + y2*stride];
4202                     sum += v;
4203                     sqr += v*v;
4204                     count++;
4205                 }
4206             }
4207             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4208         }
4209     }
4210 }
4211
4212 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4213 {
4214     int16_t weight[6][64];
4215     DCTELEM orig[6][64];
4216     const int mb_x= s->mb_x;
4217     const int mb_y= s->mb_y;
4218     int i;
4219     int skip_dct[6];
4220     int dct_offset   = s->linesize*8; //default for progressive frames
4221     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4222     int wrap_y, wrap_c;
4223
4224     for(i=0; i<6; i++) skip_dct[i]=0;
4225
4226     if(s->adaptive_quant){
4227         const int last_qp= s->qscale;
4228         const int mb_xy= mb_x + mb_y*s->mb_stride;
4229
4230         s->lambda= s->lambda_table[mb_xy];
4231         update_qscale(s);
4232
4233         if(!(s->flags&CODEC_FLAG_QP_RD)){
4234             s->dquant= s->qscale - last_qp;
4235
4236             if(s->out_format==FMT_H263){
4237                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4238
4239                 if(s->codec_id==CODEC_ID_MPEG4){
4240                     if(!s->mb_intra){
4241                         if(s->pict_type == B_TYPE){
4242                             if(s->dquant&1)
4243                                 s->dquant= (s->dquant/2)*2;
4244                             if(s->mv_dir&MV_DIRECT)
4245                                 s->dquant= 0;
4246                         }
4247                         if(s->mv_type==MV_TYPE_8X8)
4248                             s->dquant=0;
4249                     }
4250                 }
4251             }
4252         }
4253         ff_set_qscale(s, last_qp + s->dquant);
4254     }else if(s->flags&CODEC_FLAG_QP_RD)
4255         ff_set_qscale(s, s->qscale + s->dquant);
4256
4257     wrap_y = s->linesize;
4258     wrap_c = s->uvlinesize;
4259     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4260     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4261     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4262
4263     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4264         uint8_t *ebuf= s->edge_emu_buffer + 32;
4265         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4266         ptr_y= ebuf;
4267         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4268         ptr_cb= ebuf+18*wrap_y;
4269         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4270         ptr_cr= ebuf+18*wrap_y+8;
4271     }
4272
4273     if (s->mb_intra) {
4274         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4275             int progressive_score, interlaced_score;
4276
4277             s->interlaced_dct=0;
4278             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4279                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4280
4281             if(progressive_score > 0){
4282                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4283                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4284                 if(progressive_score > interlaced_score){
4285                     s->interlaced_dct=1;
4286
4287                     dct_offset= wrap_y;
4288                     wrap_y<<=1;
4289                 }
4290             }
4291         }
4292
4293         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4294         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4295         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4296         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4297
4298         if(s->flags&CODEC_FLAG_GRAY){
4299             skip_dct[4]= 1;
4300             skip_dct[5]= 1;
4301         }else{
4302             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4303             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4304         }
4305     }else{
4306         op_pixels_func (*op_pix)[4];
4307         qpel_mc_func (*op_qpix)[16];
4308         uint8_t *dest_y, *dest_cb, *dest_cr;
4309
4310         dest_y  = s->dest[0];
4311         dest_cb = s->dest[1];
4312         dest_cr = s->dest[2];
4313
4314         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4315             op_pix = s->dsp.put_pixels_tab;
4316             op_qpix= s->dsp.put_qpel_pixels_tab;
4317         }else{
4318             op_pix = s->dsp.put_no_rnd_pixels_tab;
4319             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4320         }
4321
4322         if (s->mv_dir & MV_DIR_FORWARD) {
4323             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4324             op_pix = s->dsp.avg_pixels_tab;
4325             op_qpix= s->dsp.avg_qpel_pixels_tab;
4326         }
4327         if (s->mv_dir & MV_DIR_BACKWARD) {
4328             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4329         }
4330
4331         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4332             int progressive_score, interlaced_score;
4333
4334             s->interlaced_dct=0;
4335             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4336                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4337
4338             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4339
4340             if(progressive_score>0){
4341                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4342                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4343
4344                 if(progressive_score > interlaced_score){
4345                     s->interlaced_dct=1;
4346
4347                     dct_offset= wrap_y;
4348                     wrap_y<<=1;
4349                 }
4350             }
4351         }
4352
4353         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4354         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4355         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4356         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4357
4358         if(s->flags&CODEC_FLAG_GRAY){
4359             skip_dct[4]= 1;
4360             skip_dct[5]= 1;
4361         }else{
4362             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4363             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4364         }
4365         /* pre quantization */
4366         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4367             //FIXME optimize
4368             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4369             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4370             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4371             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4372             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4373             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4374         }
4375     }
4376
4377     if(s->avctx->quantizer_noise_shaping){
4378         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4379         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4380         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4381         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4382         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4383         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4384         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4385     }
4386
4387     /* DCT & quantize */
4388     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4389     {
4390         for(i=0;i<6;i++) {
4391             if(!skip_dct[i]){
4392                 int overflow;
4393                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4394             // FIXME we could decide to change to quantizer instead of clipping
4395             // JS: I don't think that would be a good idea it could lower quality instead
4396             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4397                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4398             }else
4399                 s->block_last_index[i]= -1;
4400         }
4401         if(s->avctx->quantizer_noise_shaping){
4402             for(i=0;i<6;i++) {
4403                 if(!skip_dct[i]){
4404                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4405                 }
4406             }
4407         }
4408
4409         if(s->luma_elim_threshold && !s->mb_intra)
4410             for(i=0; i<4; i++)
4411                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4412         if(s->chroma_elim_threshold && !s->mb_intra)
4413             for(i=4; i<6; i++)
4414                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4415
4416         if(s->flags & CODEC_FLAG_CBP_RD){
4417             for(i=0;i<6;i++) {
4418                 if(s->block_last_index[i] == -1)
4419                     s->coded_score[i]= INT_MAX/256;
4420             }
4421         }
4422     }
4423
4424     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4425         s->block_last_index[4]=
4426         s->block_last_index[5]= 0;
4427         s->block[4][0]=
4428         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4429     }
4430
4431     //non c quantize code returns incorrect block_last_index FIXME
4432     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4433         for(i=0; i<6; i++){
4434             int j;
4435             if(s->block_last_index[i]>0){
4436                 for(j=63; j>0; j--){
4437                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4438                 }
4439                 s->block_last_index[i]= j;
4440             }
4441         }
4442     }
4443
4444     /* huffman encode */
4445     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4446     case CODEC_ID_MPEG1VIDEO:
4447     case CODEC_ID_MPEG2VIDEO:
4448         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4449     case CODEC_ID_MPEG4:
4450         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4451     case CODEC_ID_MSMPEG4V2:
4452     case CODEC_ID_MSMPEG4V3:
4453     case CODEC_ID_WMV1:
4454         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4455     case CODEC_ID_WMV2:
4456          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4457 #ifdef CONFIG_H261_ENCODER
4458     case CODEC_ID_H261:
4459         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4460 #endif
4461     case CODEC_ID_H263:
4462     case CODEC_ID_H263P:
4463     case CODEC_ID_FLV1:
4464     case CODEC_ID_RV10:
4465     case CODEC_ID_RV20:
4466         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4467     case CODEC_ID_MJPEG:
4468         mjpeg_encode_mb(s, s->block); break;
4469     default:
4470         assert(0);
4471     }
4472 }
4473
4474 #endif //CONFIG_ENCODERS
4475
4476 void ff_mpeg_flush(AVCodecContext *avctx){
4477     int i;
4478     MpegEncContext *s = avctx->priv_data;
4479
4480     if(s==NULL || s->picture==NULL)
4481         return;
4482
4483     for(i=0; i<MAX_PICTURE_COUNT; i++){
4484        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4485                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4486         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4487     }
4488     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4489
4490     s->mb_x= s->mb_y= 0;
4491
4492     s->parse_context.state= -1;
4493     s->parse_context.frame_start_found= 0;
4494     s->parse_context.overread= 0;
4495     s->parse_context.overread_index= 0;
4496     s->parse_context.index= 0;
4497     s->parse_context.last_index= 0;
4498     s->bitstream_buffer_size=0;
4499 }
4500
4501 #ifdef CONFIG_ENCODERS
4502 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4503 {
4504     const uint16_t *srcw= (uint16_t*)src;
4505     int words= length>>4;
4506     int bits= length&15;
4507     int i;
4508
4509     if(length==0) return;
4510
4511     if(words < 16){
4512         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4513     }else if(put_bits_count(pb)&7){
4514         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4515     }else{
4516         for(i=0; put_bits_count(pb)&31; i++)
4517             put_bits(pb, 8, src[i]);
4518         flush_put_bits(pb);
4519         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4520         skip_put_bytes(pb, 2*words-i);
4521     }
4522
4523     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4524 }
4525
4526 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4527     int i;
4528
4529     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4530
4531     /* mpeg1 */
4532     d->mb_skip_run= s->mb_skip_run;
4533     for(i=0; i<3; i++)
4534         d->last_dc[i]= s->last_dc[i];
4535
4536     /* statistics */
4537     d->mv_bits= s->mv_bits;
4538     d->i_tex_bits= s->i_tex_bits;
4539     d->p_tex_bits= s->p_tex_bits;
4540     d->i_count= s->i_count;
4541     d->f_count= s->f_count;
4542     d->b_count= s->b_count;
4543     d->skip_count= s->skip_count;
4544     d->misc_bits= s->misc_bits;
4545     d->last_bits= 0;
4546
4547     d->mb_skipped= 0;
4548     d->qscale= s->qscale;
4549     d->dquant= s->dquant;
4550 }
4551
4552 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4553     int i;
4554
4555     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4556     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4557
4558     /* mpeg1 */
4559     d->mb_skip_run= s->mb_skip_run;
4560     for(i=0; i<3; i++)
4561         d->last_dc[i]= s->last_dc[i];
4562
4563     /* statistics */
4564     d->mv_bits= s->mv_bits;
4565     d->i_tex_bits= s->i_tex_bits;
4566     d->p_tex_bits= s->p_tex_bits;
4567     d->i_count= s->i_count;
4568     d->f_count= s->f_count;
4569     d->b_count= s->b_count;
4570     d->skip_count= s->skip_count;
4571     d->misc_bits= s->misc_bits;
4572
4573     d->mb_intra= s->mb_intra;
4574     d->mb_skipped= s->mb_skipped;
4575     d->mv_type= s->mv_type;
4576     d->mv_dir= s->mv_dir;
4577     d->pb= s->pb;
4578     if(s->data_partitioning){
4579         d->pb2= s->pb2;
4580         d->tex_pb= s->tex_pb;
4581     }
4582     d->block= s->block;
4583     for(i=0; i<6; i++)
4584         d->block_last_index[i]= s->block_last_index[i];
4585     d->interlaced_dct= s->interlaced_dct;
4586     d->qscale= s->qscale;
4587 }
4588
4589 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4590                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4591                            int *dmin, int *next_block, int motion_x, int motion_y)
4592 {
4593     int score;
4594     uint8_t *dest_backup[3];
4595
4596     copy_context_before_encode(s, backup, type);
4597
4598     s->block= s->blocks[*next_block];
4599     s->pb= pb[*next_block];
4600     if(s->data_partitioning){
4601         s->pb2   = pb2   [*next_block];
4602         s->tex_pb= tex_pb[*next_block];
4603     }
4604
4605     if(*next_block){
4606         memcpy(dest_backup, s->dest, sizeof(s->dest));
4607         s->dest[0] = s->rd_scratchpad;
4608         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4609         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4610         assert(s->linesize >= 32); //FIXME
4611     }
4612
4613     encode_mb(s, motion_x, motion_y);
4614
4615     score= put_bits_count(&s->pb);
4616     if(s->data_partitioning){
4617         score+= put_bits_count(&s->pb2);
4618         score+= put_bits_count(&s->tex_pb);
4619     }
4620
4621     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4622         MPV_decode_mb(s, s->block);
4623
4624         score *= s->lambda2;
4625         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4626     }
4627
4628     if(*next_block){
4629         memcpy(s->dest, dest_backup, sizeof(s->dest));
4630     }
4631
4632     if(score<*dmin){
4633         *dmin= score;
4634         *next_block^=1;
4635
4636         copy_context_after_encode(best, s, type);
4637     }
4638 }
4639
4640 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4641     uint32_t *sq = squareTbl + 256;
4642     int acc=0;
4643     int x,y;
4644
4645     if(w==16 && h==16)
4646         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4647     else if(w==8 && h==8)
4648         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4649
4650     for(y=0; y<h; y++){
4651         for(x=0; x<w; x++){
4652             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4653         }
4654     }
4655
4656     assert(acc>=0);
4657
4658     return acc;
4659 }
4660
4661 static int sse_mb(MpegEncContext *s){
4662     int w= 16;
4663     int h= 16;
4664
4665     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4666     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4667
4668     if(w==16 && h==16)
4669       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4670         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4671                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4672                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4673       }else{
4674         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4675                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4676                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4677       }
4678     else
4679         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4680                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4681                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4682 }
4683
4684 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4685     MpegEncContext *s= arg;
4686
4687
4688     s->me.pre_pass=1;
4689     s->me.dia_size= s->avctx->pre_dia_size;
4690     s->first_slice_line=1;
4691     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4692         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4693             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4694         }
4695         s->first_slice_line=0;
4696     }
4697
4698     s->me.pre_pass=0;
4699
4700     return 0;
4701 }
4702
4703 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4704     MpegEncContext *s= arg;
4705
4706     s->me.dia_size= s->avctx->dia_size;
4707     s->first_slice_line=1;
4708     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4709         s->mb_x=0; //for block init below
4710         ff_init_block_index(s);
4711         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4712             s->block_index[0]+=2;
4713             s->block_index[1]+=2;
4714             s->block_index[2]+=2;
4715             s->block_index[3]+=2;
4716
4717             /* compute motion vector & mb_type and store in context */
4718             if(s->pict_type==B_TYPE)
4719                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4720             else
4721                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4722         }
4723         s->first_slice_line=0;
4724     }
4725     return 0;
4726 }
4727
4728 static int mb_var_thread(AVCodecContext *c, void *arg){
4729     MpegEncContext *s= arg;
4730     int mb_x, mb_y;
4731
4732     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4733         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4734             int xx = mb_x * 16;
4735             int yy = mb_y * 16;
4736             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4737             int varc;
4738             int sum = s->dsp.pix_sum(pix, s->linesize);
4739
4740             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4741
4742             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4743             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4744             s->me.mb_var_sum_temp    += varc;
4745         }
4746     }
4747     return 0;
4748 }
4749
4750 static void write_slice_end(MpegEncContext *s){
4751     if(s->codec_id==CODEC_ID_MPEG4){
4752         if(s->partitioned_frame){
4753             ff_mpeg4_merge_partitions(s);
4754         }
4755
4756         ff_mpeg4_stuffing(&s->pb);
4757     }else if(s->out_format == FMT_MJPEG){
4758         ff_mjpeg_stuffing(&s->pb);
4759     }
4760
4761     align_put_bits(&s->pb);
4762     flush_put_bits(&s->pb);
4763
4764     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4765         s->misc_bits+= get_bits_diff(s);
4766 }
4767
4768 static int encode_thread(AVCodecContext *c, void *arg){
4769     MpegEncContext *s= arg;
4770     int mb_x, mb_y, pdif = 0;
4771     int i, j;
4772     MpegEncContext best_s, backup_s;
4773     uint8_t bit_buf[2][MAX_MB_BYTES];
4774     uint8_t bit_buf2[2][MAX_MB_BYTES];
4775     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4776     PutBitContext pb[2], pb2[2], tex_pb[2];
4777 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4778
4779     for(i=0; i<2; i++){
4780         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4781         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4782         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4783     }
4784
4785     s->last_bits= put_bits_count(&s->pb);
4786     s->mv_bits=0;
4787     s->misc_bits=0;
4788     s->i_tex_bits=0;
4789     s->p_tex_bits=0;
4790     s->i_count=0;
4791     s->f_count=0;
4792     s->b_count=0;
4793     s->skip_count=0;
4794
4795     for(i=0; i<3; i++){
4796         /* init last dc values */
4797         /* note: quant matrix value (8) is implied here */
4798         s->last_dc[i] = 128 << s->intra_dc_precision;
4799
4800         s->current_picture.error[i] = 0;
4801     }
4802     s->mb_skip_run = 0;
4803     memset(s->last_mv, 0, sizeof(s->last_mv));
4804
4805     s->last_mv_dir = 0;
4806
4807     switch(s->codec_id){
4808     case CODEC_ID_H263:
4809     case CODEC_ID_H263P:
4810     case CODEC_ID_FLV1:
4811         s->gob_index = ff_h263_get_gob_height(s);
4812         break;
4813     case CODEC_ID_MPEG4:
4814         if(s->partitioned_frame)
4815             ff_mpeg4_init_partitions(s);
4816         break;
4817     }
4818
4819     s->resync_mb_x=0;
4820     s->resync_mb_y=0;
4821     s->first_slice_line = 1;
4822     s->ptr_lastgob = s->pb.buf;
4823     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4824 //    printf("row %d at %X\n", s->mb_y, (int)s);
4825         s->mb_x=0;
4826         s->mb_y= mb_y;
4827
4828         ff_set_qscale(s, s->qscale);
4829         ff_init_block_index(s);
4830
4831         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4832             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4833             int mb_type= s->mb_type[xy];
4834 //            int d;
4835             int dmin= INT_MAX;
4836             int dir;
4837
4838             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4839                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4840                 return -1;
4841             }
4842             if(s->data_partitioning){
4843                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4844                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4845                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4846                     return -1;
4847                 }
4848             }
4849
4850             s->mb_x = mb_x;
4851             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4852             ff_update_block_index(s);
4853
4854 #ifdef CONFIG_H261_ENCODER
4855             if(s->codec_id == CODEC_ID_H261){
4856                 ff_h261_reorder_mb_index(s);
4857                 xy= s->mb_y*s->mb_stride + s->mb_x;
4858                 mb_type= s->mb_type[xy];
4859             }
4860 #endif
4861
4862             /* write gob / video packet header  */
4863             if(s->rtp_mode){
4864                 int current_packet_size, is_gob_start;
4865
4866                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4867
4868                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4869
4870                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4871
4872                 switch(s->codec_id){
4873                 case CODEC_ID_H263:
4874                 case CODEC_ID_H263P:
4875                     if(!s->h263_slice_structured)
4876                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4877                     break;
4878                 case CODEC_ID_MPEG2VIDEO:
4879                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4880                 case CODEC_ID_MPEG1VIDEO:
4881                     if(s->mb_skip_run) is_gob_start=0;
4882                     break;
4883                 }
4884
4885                 if(is_gob_start){
4886                     if(s->start_mb_y != mb_y || mb_x!=0){
4887                         write_slice_end(s);
4888
4889                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4890                             ff_mpeg4_init_partitions(s);
4891                         }
4892                     }
4893
4894                     assert((put_bits_count(&s->pb)&7) == 0);
4895                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4896
4897                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4898                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4899                         int d= 100 / s->avctx->error_rate;
4900                         if(r % d == 0){
4901                             current_packet_size=0;
4902 #ifndef ALT_BITSTREAM_WRITER
4903                             s->pb.buf_ptr= s->ptr_lastgob;
4904 #endif
4905                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4906                         }
4907                     }
4908
4909                     if (s->avctx->rtp_callback){
4910                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4911                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4912                     }
4913
4914                     switch(s->codec_id){
4915                     case CODEC_ID_MPEG4:
4916                         ff_mpeg4_encode_video_packet_header(s);
4917                         ff_mpeg4_clean_buffers(s);
4918                     break;
4919                     case CODEC_ID_MPEG1VIDEO:
4920                     case CODEC_ID_MPEG2VIDEO:
4921                         ff_mpeg1_encode_slice_header(s);
4922                         ff_mpeg1_clean_buffers(s);
4923                     break;
4924                     case CODEC_ID_H263:
4925                     case CODEC_ID_H263P:
4926                         h263_encode_gob_header(s, mb_y);
4927                     break;
4928                     }
4929
4930                     if(s->flags&CODEC_FLAG_PASS1){
4931                         int bits= put_bits_count(&s->pb);
4932                         s->misc_bits+= bits - s->last_bits;
4933                         s->last_bits= bits;
4934                     }
4935
4936                     s->ptr_lastgob += current_packet_size;
4937                     s->first_slice_line=1;
4938                     s->resync_mb_x=mb_x;
4939                     s->resync_mb_y=mb_y;
4940                 }
4941             }
4942
4943             if(  (s->resync_mb_x   == s->mb_x)
4944                && s->resync_mb_y+1 == s->mb_y){
4945                 s->first_slice_line=0;
4946             }
4947
4948             s->mb_skipped=0;
4949             s->dquant=0; //only for QP_RD
4950
4951             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
4952                 int next_block=0;
4953                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4954
4955                 copy_context_before_encode(&backup_s, s, -1);
4956                 backup_s.pb= s->pb;
4957                 best_s.data_partitioning= s->data_partitioning;
4958                 best_s.partitioned_frame= s->partitioned_frame;
4959                 if(s->data_partitioning){
4960                     backup_s.pb2= s->pb2;
4961                     backup_s.tex_pb= s->tex_pb;
4962                 }
4963
4964                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4965                     s->mv_dir = MV_DIR_FORWARD;
4966                     s->mv_type = MV_TYPE_16X16;
4967                     s->mb_intra= 0;
4968                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4969                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4970                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
4971                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4972                 }
4973                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
4974                     s->mv_dir = MV_DIR_FORWARD;
4975                     s->mv_type = MV_TYPE_FIELD;
4976                     s->mb_intra= 0;
4977                     for(i=0; i<2; i++){
4978                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4979                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4980                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4981                     }
4982                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
4983                                  &dmin, &next_block, 0, 0);
4984                 }
4985                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
4986                     s->mv_dir = MV_DIR_FORWARD;
4987                     s->mv_type = MV_TYPE_16X16;
4988                     s->mb_intra= 0;
4989                     s->mv[0][0][0] = 0;
4990                     s->mv[0][0][1] = 0;
4991                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
4992                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4993                 }
4994                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
4995                     s->mv_dir = MV_DIR_FORWARD;
4996                     s->mv_type = MV_TYPE_8X8;
4997                     s->mb_intra= 0;
4998                     for(i=0; i<4; i++){
4999                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5000                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5001                     }
5002                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5003                                  &dmin, &next_block, 0, 0);
5004                 }
5005                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5006                     s->mv_dir = MV_DIR_FORWARD;
5007                     s->mv_type = MV_TYPE_16X16;
5008                     s->mb_intra= 0;
5009                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5010                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5011                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5012                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5013                 }
5014                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5015                     s->mv_dir = MV_DIR_BACKWARD;
5016                     s->mv_type = MV_TYPE_16X16;
5017                     s->mb_intra= 0;
5018                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5019                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5020                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5021                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5022                 }
5023                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5024                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5025                     s->mv_type = MV_TYPE_16X16;
5026                     s->mb_intra= 0;
5027                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5028                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5029                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5030                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5031                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5032                                  &dmin, &next_block, 0, 0);
5033                 }
5034                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5035                     int mx= s->b_direct_mv_table[xy][0];
5036                     int my= s->b_direct_mv_table[xy][1];
5037
5038                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5039                     s->mb_intra= 0;
5040                     ff_mpeg4_set_direct_mv(s, mx, my);
5041                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5042                                  &dmin, &next_block, mx, my);
5043                 }
5044                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5045                     s->mv_dir = MV_DIR_FORWARD;
5046                     s->mv_type = MV_TYPE_FIELD;
5047                     s->mb_intra= 0;
5048                     for(i=0; i<2; i++){
5049                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5050                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5051                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5052                     }
5053                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5054                                  &dmin, &next_block, 0, 0);
5055                 }
5056                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5057                     s->mv_dir = MV_DIR_BACKWARD;
5058                     s->mv_type = MV_TYPE_FIELD;
5059                     s->mb_intra= 0;
5060                     for(i=0; i<2; i++){
5061                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5062                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5063                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5064                     }
5065                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5066                                  &dmin, &next_block, 0, 0);
5067                 }
5068                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5069                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5070                     s->mv_type = MV_TYPE_FIELD;
5071                     s->mb_intra= 0;
5072                     for(dir=0; dir<2; dir++){
5073                         for(i=0; i<2; i++){
5074                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5075                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5076                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5077                         }
5078                     }
5079                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5080                                  &dmin, &next_block, 0, 0);
5081                 }
5082                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5083                     s->mv_dir = 0;
5084                     s->mv_type = MV_TYPE_16X16;
5085                     s->mb_intra= 1;
5086                     s->mv[0][0][0] = 0;
5087                     s->mv[0][0][1] = 0;
5088                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5089                                  &dmin, &next_block, 0, 0);
5090                     if(s->h263_pred || s->h263_aic){
5091                         if(best_s.mb_intra)
5092                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5093                         else
5094                             ff_clean_intra_table_entries(s); //old mode?
5095                     }
5096                 }
5097
5098                 if(s->flags & CODEC_FLAG_QP_RD){
5099                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
5100                         const int last_qp= backup_s.qscale;
5101                         int dquant, dir, qp, dc[6];
5102                         DCTELEM ac[6][16];
5103                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5104
5105                         assert(backup_s.dquant == 0);
5106
5107                         //FIXME intra
5108                         s->mv_dir= best_s.mv_dir;
5109                         s->mv_type = MV_TYPE_16X16;
5110                         s->mb_intra= best_s.mb_intra;
5111                         s->mv[0][0][0] = best_s.mv[0][0][0];
5112                         s->mv[0][0][1] = best_s.mv[0][0][1];
5113                         s->mv[1][0][0] = best_s.mv[1][0][0];
5114                         s->mv[1][0][1] = best_s.mv[1][0][1];
5115
5116                         dir= s->pict_type == B_TYPE ? 2 : 1;
5117                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
5118                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
5119                             qp= last_qp + dquant;
5120                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5121                                 break;
5122                             backup_s.dquant= dquant;
5123                             if(s->mb_intra && s->dc_val[0]){
5124                                 for(i=0; i<6; i++){
5125                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5126                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5127                                 }
5128                             }
5129
5130                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5131                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5132                             if(best_s.qscale != qp){
5133                                 if(s->mb_intra && s->dc_val[0]){
5134                                     for(i=0; i<6; i++){
5135                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5136                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5137                                     }
5138                                 }
5139                                 if(dir > 0 && dquant==dir){
5140                                     dquant= 0;
5141                                     dir= -dir;
5142                                 }else
5143                                     break;
5144                             }
5145                         }
5146                         qp= best_s.qscale;
5147                         s->current_picture.qscale_table[xy]= qp;
5148                     }
5149                 }
5150
5151                 copy_context_after_encode(s, &best_s, -1);
5152
5153                 pb_bits_count= put_bits_count(&s->pb);
5154                 flush_put_bits(&s->pb);
5155                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5156                 s->pb= backup_s.pb;
5157
5158                 if(s->data_partitioning){
5159                     pb2_bits_count= put_bits_count(&s->pb2);
5160                     flush_put_bits(&s->pb2);
5161                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5162                     s->pb2= backup_s.pb2;
5163
5164                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5165                     flush_put_bits(&s->tex_pb);
5166                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5167                     s->tex_pb= backup_s.tex_pb;
5168                 }
5169                 s->last_bits= put_bits_count(&s->pb);
5170
5171                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5172                     ff_h263_update_motion_val(s);
5173
5174                 if(next_block==0){ //FIXME 16 vs linesize16
5175                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5176                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5177                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5178                 }
5179
5180                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5181                     MPV_decode_mb(s, s->block);
5182             } else {
5183                 int motion_x, motion_y;
5184                 s->mv_type=MV_TYPE_16X16;
5185                 // only one MB-Type possible
5186
5187                 switch(mb_type){
5188                 case CANDIDATE_MB_TYPE_INTRA:
5189                     s->mv_dir = 0;
5190                     s->mb_intra= 1;
5191                     motion_x= s->mv[0][0][0] = 0;
5192                     motion_y= s->mv[0][0][1] = 0;
5193                     break;
5194                 case CANDIDATE_MB_TYPE_INTER:
5195                     s->mv_dir = MV_DIR_FORWARD;
5196                     s->mb_intra= 0;
5197                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5198                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5199                     break;
5200                 case CANDIDATE_MB_TYPE_INTER_I:
5201                     s->mv_dir = MV_DIR_FORWARD;
5202                     s->mv_type = MV_TYPE_FIELD;
5203                     s->mb_intra= 0;
5204                     for(i=0; i<2; i++){
5205                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5206                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5207                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5208                     }
5209                     motion_x = motion_y = 0;
5210                     break;
5211                 case CANDIDATE_MB_TYPE_INTER4V:
5212                     s->mv_dir = MV_DIR_FORWARD;
5213                     s->mv_type = MV_TYPE_8X8;
5214                     s->mb_intra= 0;
5215                     for(i=0; i<4; i++){
5216                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5217                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5218                     }
5219                     motion_x= motion_y= 0;
5220                     break;
5221                 case CANDIDATE_MB_TYPE_DIRECT:
5222                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5223                     s->mb_intra= 0;
5224                     motion_x=s->b_direct_mv_table[xy][0];
5225                     motion_y=s->b_direct_mv_table[xy][1];
5226                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5227                     break;
5228                 case CANDIDATE_MB_TYPE_BIDIR:
5229                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5230                     s->mb_intra= 0;
5231                     motion_x=0;
5232                     motion_y=0;
5233                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5234                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5235                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5236                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5237                     break;
5238                 case CANDIDATE_MB_TYPE_BACKWARD:
5239                     s->mv_dir = MV_DIR_BACKWARD;
5240                     s->mb_intra= 0;
5241                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5242                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5243                     break;
5244                 case CANDIDATE_MB_TYPE_FORWARD:
5245                     s->mv_dir = MV_DIR_FORWARD;
5246                     s->mb_intra= 0;
5247                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5248                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5249 //                    printf(" %d %d ", motion_x, motion_y);
5250                     break;
5251                 case CANDIDATE_MB_TYPE_FORWARD_I:
5252                     s->mv_dir = MV_DIR_FORWARD;
5253                     s->mv_type = MV_TYPE_FIELD;
5254                     s->mb_intra= 0;
5255                     for(i=0; i<2; i++){
5256                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5257                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5258                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5259                     }
5260                     motion_x=motion_y=0;
5261                     break;
5262                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5263                     s->mv_dir = MV_DIR_BACKWARD;
5264                     s->mv_type = MV_TYPE_FIELD;
5265                     s->mb_intra= 0;
5266                     for(i=0; i<2; i++){
5267                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5268                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5269                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5270                     }
5271                     motion_x=motion_y=0;
5272                     break;
5273                 case CANDIDATE_MB_TYPE_BIDIR_I:
5274                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5275                     s->mv_type = MV_TYPE_FIELD;
5276                     s->mb_intra= 0;
5277                     for(dir=0; dir<2; dir++){
5278                         for(i=0; i<2; i++){
5279                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5280                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5281                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5282                         }
5283                     }
5284                     motion_x=motion_y=0;
5285                     break;
5286                 default:
5287                     motion_x=motion_y=0; //gcc warning fix
5288                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5289                 }
5290
5291                 encode_mb(s, motion_x, motion_y);
5292
5293                 // RAL: Update last macroblock type
5294                 s->last_mv_dir = s->mv_dir;
5295
5296                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5297                     ff_h263_update_motion_val(s);
5298
5299                 MPV_decode_mb(s, s->block);
5300             }
5301
5302             /* clean the MV table in IPS frames for direct mode in B frames */
5303             if(s->mb_intra /* && I,P,S_TYPE */){
5304                 s->p_mv_table[xy][0]=0;
5305                 s->p_mv_table[xy][1]=0;
5306             }
5307
5308             if(s->flags&CODEC_FLAG_PSNR){
5309                 int w= 16;
5310                 int h= 16;
5311
5312                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5313                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5314
5315                 s->current_picture.error[0] += sse(
5316                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5317                     s->dest[0], w, h, s->linesize);
5318                 s->current_picture.error[1] += sse(
5319                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5320                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5321                 s->current_picture.error[2] += sse(
5322                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5323                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5324             }
5325             if(s->loop_filter){
5326                 if(s->out_format == FMT_H263)
5327                     ff_h263_loop_filter(s);
5328             }
5329 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5330         }
5331     }
5332
5333     //not beautiful here but we must write it before flushing so it has to be here
5334     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5335         msmpeg4_encode_ext_header(s);
5336
5337     write_slice_end(s);
5338
5339     /* Send the last GOB if RTP */
5340     if (s->avctx->rtp_callback) {
5341         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5342         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5343         /* Call the RTP callback to send the last GOB */
5344         emms_c();
5345         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5346     }
5347
5348     return 0;
5349 }
5350
5351 #define MERGE(field) dst->field += src->field; src->field=0
5352 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5353     MERGE(me.scene_change_score);
5354     MERGE(me.mc_mb_var_sum_temp);
5355     MERGE(me.mb_var_sum_temp);
5356 }
5357
5358 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5359     int i;
5360
5361     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5362     MERGE(dct_count[1]);
5363     MERGE(mv_bits);
5364     MERGE(i_tex_bits);
5365     MERGE(p_tex_bits);
5366     MERGE(i_count);
5367     MERGE(f_count);
5368     MERGE(b_count);
5369     MERGE(skip_count);
5370     MERGE(misc_bits);
5371     MERGE(error_count);
5372     MERGE(padding_bug_score);
5373     MERGE(current_picture.error[0]);
5374     MERGE(current_picture.error[1]);
5375     MERGE(current_picture.error[2]);
5376
5377     if(dst->avctx->noise_reduction){
5378         for(i=0; i<64; i++){
5379             MERGE(dct_error_sum[0][i]);
5380             MERGE(dct_error_sum[1][i]);
5381         }
5382     }
5383
5384     assert(put_bits_count(&src->pb) % 8 ==0);
5385     assert(put_bits_count(&dst->pb) % 8 ==0);
5386     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5387     flush_put_bits(&dst->pb);
5388 }
5389
5390 static void estimate_qp(MpegEncContext *s, int dry_run){
5391     if (!s->fixed_qscale)
5392         s->current_picture_ptr->quality=
5393         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5394
5395     if(s->adaptive_quant){
5396         switch(s->codec_id){
5397         case CODEC_ID_MPEG4:
5398             ff_clean_mpeg4_qscales(s);
5399             break;
5400         case CODEC_ID_H263:
5401         case CODEC_ID_H263P:
5402         case CODEC_ID_FLV1:
5403             ff_clean_h263_qscales(s);
5404             break;
5405         }
5406
5407         s->lambda= s->lambda_table[0];
5408         //FIXME broken
5409     }else
5410         s->lambda= s->current_picture.quality;
5411 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5412     update_qscale(s);
5413 }
5414
5415 static void encode_picture(MpegEncContext *s, int picture_number)
5416 {
5417     int i;
5418     int bits;
5419
5420     s->picture_number = picture_number;
5421
5422     /* Reset the average MB variance */
5423     s->me.mb_var_sum_temp    =
5424     s->me.mc_mb_var_sum_temp = 0;
5425
5426     /* we need to initialize some time vars before we can encode b-frames */
5427     // RAL: Condition added for MPEG1VIDEO
5428     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5429         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5430
5431     s->me.scene_change_score=0;
5432
5433 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5434
5435     if(s->pict_type==I_TYPE){
5436         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5437         else                        s->no_rounding=0;
5438     }else if(s->pict_type!=B_TYPE){
5439         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5440             s->no_rounding ^= 1;
5441     }
5442
5443     if(s->flags & CODEC_FLAG_PASS2){
5444         estimate_qp(s, 1);
5445         ff_get_2pass_fcode(s);
5446     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5447         if(s->pict_type==B_TYPE)
5448             s->lambda= s->last_lambda_for[s->pict_type];
5449         else
5450             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5451         update_qscale(s);
5452     }
5453
5454     s->mb_intra=0; //for the rate distortion & bit compare functions
5455     for(i=1; i<s->avctx->thread_count; i++){
5456         ff_update_duplicate_context(s->thread_context[i], s);
5457     }
5458
5459     ff_init_me(s);
5460
5461     /* Estimate motion for every MB */
5462     if(s->pict_type != I_TYPE){
5463         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5464         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5465         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5466             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5467                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5468             }
5469         }
5470
5471         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5472     }else /* if(s->pict_type == I_TYPE) */{
5473         /* I-Frame */
5474         for(i=0; i<s->mb_stride*s->mb_height; i++)
5475             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5476
5477         if(!s->fixed_qscale){
5478             /* finding spatial complexity for I-frame rate control */
5479             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5480         }
5481     }
5482     for(i=1; i<s->avctx->thread_count; i++){
5483         merge_context_after_me(s, s->thread_context[i]);
5484     }
5485     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5486     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5487     emms_c();
5488
5489     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5490         s->pict_type= I_TYPE;
5491         for(i=0; i<s->mb_stride*s->mb_height; i++)
5492             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5493 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5494     }
5495
5496     if(!s->umvplus){
5497         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5498             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5499
5500             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5501                 int a,b;
5502                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5503                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5504                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5505             }
5506
5507             ff_fix_long_p_mvs(s);
5508             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5509             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5510                 int j;
5511                 for(i=0; i<2; i++){
5512                     for(j=0; j<2; j++)
5513                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5514                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5515                 }
5516             }
5517         }
5518
5519         if(s->pict_type==B_TYPE){
5520             int a, b;
5521
5522             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5523             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5524             s->f_code = FFMAX(a, b);
5525
5526             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5527             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5528             s->b_code = FFMAX(a, b);
5529
5530             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5531             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5532             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5533             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5534             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5535                 int dir, j;
5536                 for(dir=0; dir<2; dir++){
5537                     for(i=0; i<2; i++){
5538                         for(j=0; j<2; j++){
5539                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5540                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5541                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5542                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5543                         }
5544                     }
5545                 }
5546             }
5547         }
5548     }
5549
5550     estimate_qp(s, 0);
5551
5552     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5553         s->qscale= 3; //reduce clipping problems
5554
5555     if (s->out_format == FMT_MJPEG) {
5556         /* for mjpeg, we do include qscale in the matrix */
5557         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5558         for(i=1;i<64;i++){
5559             int j= s->dsp.idct_permutation[i];
5560
5561             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3) & 0xFF;
5562         }
5563         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5564                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5565         s->qscale= 8;
5566     }
5567
5568     //FIXME var duplication
5569     s->current_picture_ptr->key_frame=
5570     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5571     s->current_picture_ptr->pict_type=
5572     s->current_picture.pict_type= s->pict_type;
5573
5574     if(s->current_picture.key_frame)
5575         s->picture_in_gop_number=0;
5576
5577     s->last_bits= put_bits_count(&s->pb);
5578     switch(s->out_format) {
5579     case FMT_MJPEG:
5580         mjpeg_picture_header(s);
5581         break;
5582 #ifdef CONFIG_H261_ENCODER
5583     case FMT_H261:
5584         ff_h261_encode_picture_header(s, picture_number);
5585         break;
5586 #endif
5587     case FMT_H263:
5588         if (s->codec_id == CODEC_ID_WMV2)
5589             ff_wmv2_encode_picture_header(s, picture_number);
5590         else if (s->h263_msmpeg4)
5591             msmpeg4_encode_picture_header(s, picture_number);
5592         else if (s->h263_pred)
5593             mpeg4_encode_picture_header(s, picture_number);
5594 #ifdef CONFIG_RV10_ENCODER
5595         else if (s->codec_id == CODEC_ID_RV10)
5596             rv10_encode_picture_header(s, picture_number);
5597 #endif
5598 #ifdef CONFIG_RV20_ENCODER
5599         else if (s->codec_id == CODEC_ID_RV20)
5600             rv20_encode_picture_header(s, picture_number);
5601 #endif
5602         else if (s->codec_id == CODEC_ID_FLV1)
5603             ff_flv_encode_picture_header(s, picture_number);
5604         else
5605             h263_encode_picture_header(s, picture_number);
5606         break;
5607     case FMT_MPEG1:
5608         mpeg1_encode_picture_header(s, picture_number);
5609         break;
5610     case FMT_H264:
5611         break;
5612     default:
5613         assert(0);
5614     }
5615     bits= put_bits_count(&s->pb);
5616     s->header_bits= bits - s->last_bits;
5617
5618     for(i=1; i<s->avctx->thread_count; i++){
5619         update_duplicate_context_after_me(s->thread_context[i], s);
5620     }
5621     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5622     for(i=1; i<s->avctx->thread_count; i++){
5623         merge_context_after_encode(s, s->thread_context[i]);
5624     }
5625     emms_c();
5626 }
5627
5628 #endif //CONFIG_ENCODERS
5629
5630 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5631     const int intra= s->mb_intra;
5632     int i;
5633
5634     s->dct_count[intra]++;
5635
5636     for(i=0; i<64; i++){
5637         int level= block[i];
5638
5639         if(level){
5640             if(level>0){
5641                 s->dct_error_sum[intra][i] += level;
5642                 level -= s->dct_offset[intra][i];
5643                 if(level<0) level=0;
5644             }else{
5645                 s->dct_error_sum[intra][i] -= level;
5646                 level += s->dct_offset[intra][i];
5647                 if(level>0) level=0;
5648             }
5649             block[i]= level;
5650         }
5651     }
5652 }
5653
5654 #ifdef CONFIG_ENCODERS
5655
5656 static int dct_quantize_trellis_c(MpegEncContext *s,
5657                         DCTELEM *block, int n,
5658                         int qscale, int *overflow){
5659     const int *qmat;
5660     const uint8_t *scantable= s->intra_scantable.scantable;
5661     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5662     int max=0;
5663     unsigned int threshold1, threshold2;
5664     int bias=0;
5665     int run_tab[65];
5666     int level_tab[65];
5667     int score_tab[65];
5668     int survivor[65];
5669     int survivor_count;
5670     int last_run=0;
5671     int last_level=0;
5672     int last_score= 0;
5673     int last_i;
5674     int coeff[2][64];
5675     int coeff_count[64];
5676     int qmul, qadd, start_i, last_non_zero, i, dc;
5677     const int esc_length= s->ac_esc_length;
5678     uint8_t * length;
5679     uint8_t * last_length;
5680     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5681
5682     s->dsp.fdct (block);
5683
5684     if(s->dct_error_sum)
5685         s->denoise_dct(s, block);
5686     qmul= qscale*16;
5687     qadd= ((qscale-1)|1)*8;
5688
5689     if (s->mb_intra) {
5690         int q;
5691         if (!s->h263_aic) {
5692             if (n < 4)
5693                 q = s->y_dc_scale;
5694             else
5695                 q = s->c_dc_scale;
5696             q = q << 3;
5697         } else{
5698             /* For AIC we skip quant/dequant of INTRADC */
5699             q = 1 << 3;
5700             qadd=0;
5701         }
5702
5703         /* note: block[0] is assumed to be positive */
5704         block[0] = (block[0] + (q >> 1)) / q;
5705         start_i = 1;
5706         last_non_zero = 0;
5707         qmat = s->q_intra_matrix[qscale];
5708         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5709             bias= 1<<(QMAT_SHIFT-1);
5710         length     = s->intra_ac_vlc_length;
5711         last_length= s->intra_ac_vlc_last_length;
5712     } else {
5713         start_i = 0;
5714         last_non_zero = -1;
5715         qmat = s->q_inter_matrix[qscale];
5716         length     = s->inter_ac_vlc_length;
5717         last_length= s->inter_ac_vlc_last_length;
5718     }
5719     last_i= start_i;
5720
5721     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5722     threshold2= (threshold1<<1);
5723
5724     for(i=63; i>=start_i; i--) {
5725         const int j = scantable[i];
5726         int level = block[j] * qmat[j];
5727
5728         if(((unsigned)(level+threshold1))>threshold2){
5729             last_non_zero = i;
5730             break;
5731         }
5732     }
5733
5734     for(i=start_i; i<=last_non_zero; i++) {
5735         const int j = scantable[i];
5736         int level = block[j] * qmat[j];
5737
5738 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5739 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5740         if(((unsigned)(level+threshold1))>threshold2){
5741             if(level>0){
5742                 level= (bias + level)>>QMAT_SHIFT;
5743                 coeff[0][i]= level;
5744                 coeff[1][i]= level-1;
5745 //                coeff[2][k]= level-2;
5746             }else{
5747                 level= (bias - level)>>QMAT_SHIFT;
5748                 coeff[0][i]= -level;
5749                 coeff[1][i]= -level+1;
5750 //                coeff[2][k]= -level+2;
5751             }
5752             coeff_count[i]= FFMIN(level, 2);
5753             assert(coeff_count[i]);
5754             max |=level;
5755         }else{
5756             coeff[0][i]= (level>>31)|1;
5757             coeff_count[i]= 1;
5758         }
5759     }
5760
5761     *overflow= s->max_qcoeff < max; //overflow might have happened
5762
5763     if(last_non_zero < start_i){
5764         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5765         return last_non_zero;
5766     }
5767
5768     score_tab[start_i]= 0;
5769     survivor[0]= start_i;
5770     survivor_count= 1;
5771
5772     for(i=start_i; i<=last_non_zero; i++){
5773         int level_index, j;
5774         const int dct_coeff= ABS(block[ scantable[i] ]);
5775         const int zero_distoration= dct_coeff*dct_coeff;
5776         int best_score=256*256*256*120;
5777         for(level_index=0; level_index < coeff_count[i]; level_index++){
5778             int distoration;
5779             int level= coeff[level_index][i];
5780             const int alevel= ABS(level);
5781             int unquant_coeff;
5782
5783             assert(level);
5784
5785             if(s->out_format == FMT_H263){
5786                 unquant_coeff= alevel*qmul + qadd;
5787             }else{ //MPEG1
5788                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5789                 if(s->mb_intra){
5790                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5791                         unquant_coeff =   (unquant_coeff - 1) | 1;
5792                 }else{
5793                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5794                         unquant_coeff =   (unquant_coeff - 1) | 1;
5795                 }
5796                 unquant_coeff<<= 3;
5797             }
5798
5799             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5800             level+=64;
5801             if((level&(~127)) == 0){
5802                 for(j=survivor_count-1; j>=0; j--){
5803                     int run= i - survivor[j];
5804                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5805                     score += score_tab[i-run];
5806
5807                     if(score < best_score){
5808                         best_score= score;
5809                         run_tab[i+1]= run;
5810                         level_tab[i+1]= level-64;
5811                     }
5812                 }
5813
5814                 if(s->out_format == FMT_H263){
5815                     for(j=survivor_count-1; j>=0; j--){
5816                         int run= i - survivor[j];
5817                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5818                         score += score_tab[i-run];
5819                         if(score < last_score){
5820                             last_score= score;
5821                             last_run= run;
5822                             last_level= level-64;
5823                             last_i= i+1;
5824                         }
5825                     }
5826                 }
5827             }else{
5828                 distoration += esc_length*lambda;
5829                 for(j=survivor_count-1; j>=0; j--){
5830                     int run= i - survivor[j];
5831                     int score= distoration + score_tab[i-run];
5832
5833                     if(score < best_score){
5834                         best_score= score;
5835                         run_tab[i+1]= run;
5836                         level_tab[i+1]= level-64;
5837                     }
5838                 }
5839
5840                 if(s->out_format == FMT_H263){
5841                   for(j=survivor_count-1; j>=0; j--){
5842                         int run= i - survivor[j];
5843                         int score= distoration + score_tab[i-run];
5844                         if(score < last_score){
5845                             last_score= score;
5846                             last_run= run;
5847                             last_level= level-64;
5848                             last_i= i+1;
5849                         }
5850                     }
5851                 }
5852             }
5853         }
5854
5855         score_tab[i+1]= best_score;
5856
5857         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5858         if(last_non_zero <= 27){
5859             for(; survivor_count; survivor_count--){
5860                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5861                     break;
5862             }
5863         }else{
5864             for(; survivor_count; survivor_count--){
5865                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5866                     break;
5867             }
5868         }
5869
5870         survivor[ survivor_count++ ]= i+1;
5871     }
5872
5873     if(s->out_format != FMT_H263){
5874         last_score= 256*256*256*120;
5875         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5876             int score= score_tab[i];
5877             if(i) score += lambda*2; //FIXME exacter?
5878
5879             if(score < last_score){
5880                 last_score= score;
5881                 last_i= i;
5882                 last_level= level_tab[i];
5883                 last_run= run_tab[i];
5884             }
5885         }
5886     }
5887
5888     s->coded_score[n] = last_score;
5889
5890     dc= ABS(block[0]);
5891     last_non_zero= last_i - 1;
5892     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5893
5894     if(last_non_zero < start_i)
5895         return last_non_zero;
5896
5897     if(last_non_zero == 0 && start_i == 0){
5898         int best_level= 0;
5899         int best_score= dc * dc;
5900
5901         for(i=0; i<coeff_count[0]; i++){
5902             int level= coeff[i][0];
5903             int alevel= ABS(level);
5904             int unquant_coeff, score, distortion;
5905
5906             if(s->out_format == FMT_H263){
5907                     unquant_coeff= (alevel*qmul + qadd)>>3;
5908             }else{ //MPEG1
5909                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5910                     unquant_coeff =   (unquant_coeff - 1) | 1;
5911             }
5912             unquant_coeff = (unquant_coeff + 4) >> 3;
5913             unquant_coeff<<= 3 + 3;
5914
5915             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5916             level+=64;
5917             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5918             else                    score= distortion + esc_length*lambda;
5919
5920             if(score < best_score){
5921                 best_score= score;
5922                 best_level= level - 64;
5923             }
5924         }
5925         block[0]= best_level;
5926         s->coded_score[n] = best_score - dc*dc;
5927         if(best_level == 0) return -1;
5928         else                return last_non_zero;
5929     }
5930
5931     i= last_i;
5932     assert(last_level);
5933
5934     block[ perm_scantable[last_non_zero] ]= last_level;
5935     i -= last_run + 1;
5936
5937     for(; i>start_i; i -= run_tab[i] + 1){
5938         block[ perm_scantable[i-1] ]= level_tab[i];
5939     }
5940
5941     return last_non_zero;
5942 }
5943
5944 //#define REFINE_STATS 1
5945 static int16_t basis[64][64];
5946
5947 static void build_basis(uint8_t *perm){
5948     int i, j, x, y;
5949     emms_c();
5950     for(i=0; i<8; i++){
5951         for(j=0; j<8; j++){
5952             for(y=0; y<8; y++){
5953                 for(x=0; x<8; x++){
5954                     double s= 0.25*(1<<BASIS_SHIFT);
5955                     int index= 8*i + j;
5956                     int perm_index= perm[index];
5957                     if(i==0) s*= sqrt(0.5);
5958                     if(j==0) s*= sqrt(0.5);
5959                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5960                 }
5961             }
5962         }
5963     }
5964 }
5965
5966 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5967                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5968                         int n, int qscale){
5969     int16_t rem[64];
5970     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
5971     const int *qmat;
5972     const uint8_t *scantable= s->intra_scantable.scantable;
5973     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5974 //    unsigned int threshold1, threshold2;
5975 //    int bias=0;
5976     int run_tab[65];
5977     int prev_run=0;
5978     int prev_level=0;
5979     int qmul, qadd, start_i, last_non_zero, i, dc;
5980     uint8_t * length;
5981     uint8_t * last_length;
5982     int lambda;
5983     int rle_index, run, q, sum;
5984 #ifdef REFINE_STATS
5985 static int count=0;
5986 static int after_last=0;
5987 static int to_zero=0;
5988 static int from_zero=0;
5989 static int raise=0;
5990 static int lower=0;
5991 static int messed_sign=0;
5992 #endif
5993
5994     if(basis[0][0] == 0)
5995         build_basis(s->dsp.idct_permutation);
5996
5997     qmul= qscale*2;
5998     qadd= (qscale-1)|1;
5999     if (s->mb_intra) {
6000         if (!s->h263_aic) {
6001             if (n < 4)
6002                 q = s->y_dc_scale;
6003             else
6004                 q = s->c_dc_scale;
6005         } else{
6006             /* For AIC we skip quant/dequant of INTRADC */
6007             q = 1;
6008             qadd=0;
6009         }
6010         q <<= RECON_SHIFT-3;
6011         /* note: block[0] is assumed to be positive */
6012         dc= block[0]*q;
6013 //        block[0] = (block[0] + (q >> 1)) / q;
6014         start_i = 1;
6015         qmat = s->q_intra_matrix[qscale];
6016 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6017 //            bias= 1<<(QMAT_SHIFT-1);
6018         length     = s->intra_ac_vlc_length;
6019         last_length= s->intra_ac_vlc_last_length;
6020     } else {
6021         dc= 0;
6022         start_i = 0;
6023         qmat = s->q_inter_matrix[qscale];
6024         length     = s->inter_ac_vlc_length;
6025         last_length= s->inter_ac_vlc_last_length;
6026     }
6027     last_non_zero = s->block_last_index[n];
6028
6029 #ifdef REFINE_STATS
6030 {START_TIMER
6031 #endif
6032     dc += (1<<(RECON_SHIFT-1));
6033     for(i=0; i<64; i++){
6034         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6035     }
6036 #ifdef REFINE_STATS
6037 STOP_TIMER("memset rem[]")}
6038 #endif
6039     sum=0;
6040     for(i=0; i<64; i++){
6041         int one= 36;
6042         int qns=4;
6043         int w;
6044
6045         w= ABS(weight[i]) + qns*one;
6046         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6047
6048         weight[i] = w;
6049 //        w=weight[i] = (63*qns + (w/2)) / w;
6050
6051         assert(w>0);
6052         assert(w<(1<<6));
6053         sum += w*w;
6054     }
6055     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6056 #ifdef REFINE_STATS
6057 {START_TIMER
6058 #endif
6059     run=0;
6060     rle_index=0;
6061     for(i=start_i; i<=last_non_zero; i++){
6062         int j= perm_scantable[i];
6063         const int level= block[j];
6064         int coeff;
6065
6066         if(level){
6067             if(level<0) coeff= qmul*level - qadd;
6068             else        coeff= qmul*level + qadd;
6069             run_tab[rle_index++]=run;
6070             run=0;
6071
6072             s->dsp.add_8x8basis(rem, basis[j], coeff);
6073         }else{
6074             run++;
6075         }
6076     }
6077 #ifdef REFINE_STATS
6078 if(last_non_zero>0){
6079 STOP_TIMER("init rem[]")
6080 }
6081 }
6082
6083 {START_TIMER
6084 #endif
6085     for(;;){
6086         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6087         int best_coeff=0;
6088         int best_change=0;
6089         int run2, best_unquant_change=0, analyze_gradient;
6090 #ifdef REFINE_STATS
6091 {START_TIMER
6092 #endif
6093         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6094
6095         if(analyze_gradient){
6096 #ifdef REFINE_STATS
6097 {START_TIMER
6098 #endif
6099             for(i=0; i<64; i++){
6100                 int w= weight[i];
6101
6102                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6103             }
6104 #ifdef REFINE_STATS
6105 STOP_TIMER("rem*w*w")}
6106 {START_TIMER
6107 #endif
6108             s->dsp.fdct(d1);
6109 #ifdef REFINE_STATS
6110 STOP_TIMER("dct")}
6111 #endif
6112         }
6113
6114         if(start_i){
6115             const int level= block[0];
6116             int change, old_coeff;
6117
6118             assert(s->mb_intra);
6119
6120             old_coeff= q*level;
6121
6122             for(change=-1; change<=1; change+=2){
6123                 int new_level= level + change;
6124                 int score, new_coeff;
6125
6126                 new_coeff= q*new_level;
6127                 if(new_coeff >= 2048 || new_coeff < 0)
6128                     continue;
6129
6130                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6131                 if(score<best_score){
6132                     best_score= score;
6133                     best_coeff= 0;
6134                     best_change= change;
6135                     best_unquant_change= new_coeff - old_coeff;
6136                 }
6137             }
6138         }
6139
6140         run=0;
6141         rle_index=0;
6142         run2= run_tab[rle_index++];
6143         prev_level=0;
6144         prev_run=0;
6145
6146         for(i=start_i; i<64; i++){
6147             int j= perm_scantable[i];
6148             const int level= block[j];
6149             int change, old_coeff;
6150
6151             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6152                 break;
6153
6154             if(level){
6155                 if(level<0) old_coeff= qmul*level - qadd;
6156                 else        old_coeff= qmul*level + qadd;
6157                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6158             }else{
6159                 old_coeff=0;
6160                 run2--;
6161                 assert(run2>=0 || i >= last_non_zero );
6162             }
6163
6164             for(change=-1; change<=1; change+=2){
6165                 int new_level= level + change;
6166                 int score, new_coeff, unquant_change;
6167
6168                 score=0;
6169                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
6170                    continue;
6171
6172                 if(new_level){
6173                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6174                     else            new_coeff= qmul*new_level + qadd;
6175                     if(new_coeff >= 2048 || new_coeff <= -2048)
6176                         continue;
6177                     //FIXME check for overflow
6178
6179                     if(level){
6180                         if(level < 63 && level > -63){
6181                             if(i < last_non_zero)
6182                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6183                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6184                             else
6185                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6186                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6187                         }
6188                     }else{
6189                         assert(ABS(new_level)==1);
6190
6191                         if(analyze_gradient){
6192                             int g= d1[ scantable[i] ];
6193                             if(g && (g^new_level) >= 0)
6194                                 continue;
6195                         }
6196
6197                         if(i < last_non_zero){
6198                             int next_i= i + run2 + 1;
6199                             int next_level= block[ perm_scantable[next_i] ] + 64;
6200
6201                             if(next_level&(~127))
6202                                 next_level= 0;
6203
6204                             if(next_i < last_non_zero)
6205                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6206                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6207                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6208                             else
6209                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6210                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6211                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6212                         }else{
6213                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6214                             if(prev_level){
6215                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6216                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6217                             }
6218                         }
6219                     }
6220                 }else{
6221                     new_coeff=0;
6222                     assert(ABS(level)==1);
6223
6224                     if(i < last_non_zero){
6225                         int next_i= i + run2 + 1;
6226                         int next_level= block[ perm_scantable[next_i] ] + 64;
6227
6228                         if(next_level&(~127))
6229                             next_level= 0;
6230
6231                         if(next_i < last_non_zero)
6232                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6233                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6234                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6235                         else
6236                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6237                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6238                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6239                     }else{
6240                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6241                         if(prev_level){
6242                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6243                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6244                         }
6245                     }
6246                 }
6247
6248                 score *= lambda;
6249
6250                 unquant_change= new_coeff - old_coeff;
6251                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6252
6253                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6254                 if(score<best_score){
6255                     best_score= score;
6256                     best_coeff= i;
6257                     best_change= change;
6258                     best_unquant_change= unquant_change;
6259                 }
6260             }
6261             if(level){
6262                 prev_level= level + 64;
6263                 if(prev_level&(~127))
6264                     prev_level= 0;
6265                 prev_run= run;
6266                 run=0;
6267             }else{
6268                 run++;
6269             }
6270         }
6271 #ifdef REFINE_STATS
6272 STOP_TIMER("iterative step")}
6273 #endif
6274
6275         if(best_change){
6276             int j= perm_scantable[ best_coeff ];
6277
6278             block[j] += best_change;
6279
6280             if(best_coeff > last_non_zero){
6281                 last_non_zero= best_coeff;
6282                 assert(block[j]);
6283 #ifdef REFINE_STATS
6284 after_last++;
6285 #endif
6286             }else{
6287 #ifdef REFINE_STATS
6288 if(block[j]){
6289     if(block[j] - best_change){
6290         if(ABS(block[j]) > ABS(block[j] - best_change)){
6291             raise++;
6292         }else{
6293             lower++;
6294         }
6295     }else{
6296         from_zero++;
6297     }
6298 }else{
6299     to_zero++;
6300 }
6301 #endif
6302                 for(; last_non_zero>=start_i; last_non_zero--){
6303                     if(block[perm_scantable[last_non_zero]])
6304                         break;
6305                 }
6306             }
6307 #ifdef REFINE_STATS
6308 count++;
6309 if(256*256*256*64 % count == 0){
6310     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6311 }
6312 #endif
6313             run=0;
6314             rle_index=0;
6315             for(i=start_i; i<=last_non_zero; i++){
6316                 int j= perm_scantable[i];
6317                 const int level= block[j];
6318
6319                  if(level){
6320                      run_tab[rle_index++]=run;
6321                      run=0;
6322                  }else{
6323                      run++;
6324                  }
6325             }
6326
6327             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6328         }else{
6329             break;
6330         }
6331     }
6332 #ifdef REFINE_STATS
6333 if(last_non_zero>0){
6334 STOP_TIMER("iterative search")
6335 }
6336 }
6337 #endif
6338
6339     return last_non_zero;
6340 }
6341
6342 static int dct_quantize_c(MpegEncContext *s,
6343                         DCTELEM *block, int n,
6344                         int qscale, int *overflow)
6345 {
6346     int i, j, level, last_non_zero, q, start_i;
6347     const int *qmat;
6348     const uint8_t *scantable= s->intra_scantable.scantable;
6349     int bias;
6350     int max=0;
6351     unsigned int threshold1, threshold2;
6352
6353     s->dsp.fdct (block);
6354
6355     if(s->dct_error_sum)
6356         s->denoise_dct(s, block);
6357
6358     if (s->mb_intra) {
6359         if (!s->h263_aic) {
6360             if (n < 4)
6361                 q = s->y_dc_scale;
6362             else
6363                 q = s->c_dc_scale;
6364             q = q << 3;
6365         } else
6366             /* For AIC we skip quant/dequant of INTRADC */
6367             q = 1 << 3;
6368
6369         /* note: block[0] is assumed to be positive */
6370         block[0] = (block[0] + (q >> 1)) / q;
6371         start_i = 1;
6372         last_non_zero = 0;
6373         qmat = s->q_intra_matrix[qscale];
6374         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6375     } else {
6376         start_i = 0;
6377         last_non_zero = -1;
6378         qmat = s->q_inter_matrix[qscale];
6379         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6380     }
6381     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6382     threshold2= (threshold1<<1);
6383     for(i=63;i>=start_i;i--) {
6384         j = scantable[i];
6385         level = block[j] * qmat[j];
6386
6387         if(((unsigned)(level+threshold1))>threshold2){
6388             last_non_zero = i;
6389             break;
6390         }else{
6391             block[j]=0;
6392         }
6393     }
6394     for(i=start_i; i<=last_non_zero; i++) {
6395         j = scantable[i];
6396         level = block[j] * qmat[j];
6397
6398 //        if(   bias+level >= (1<<QMAT_SHIFT)
6399 //           || bias-level >= (1<<QMAT_SHIFT)){
6400         if(((unsigned)(level+threshold1))>threshold2){
6401             if(level>0){
6402                 level= (bias + level)>>QMAT_SHIFT;
6403                 block[j]= level;
6404             }else{
6405                 level= (bias - level)>>QMAT_SHIFT;
6406                 block[j]= -level;
6407             }
6408             max |=level;
6409         }else{
6410             block[j]=0;
6411         }
6412     }
6413     *overflow= s->max_qcoeff < max; //overflow might have happened
6414
6415     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6416     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6417         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6418
6419     return last_non_zero;
6420 }
6421
6422 #endif //CONFIG_ENCODERS
6423
6424 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6425                                    DCTELEM *block, int n, int qscale)
6426 {
6427     int i, level, nCoeffs;
6428     const uint16_t *quant_matrix;
6429
6430     nCoeffs= s->block_last_index[n];
6431
6432     if (n < 4)
6433         block[0] = block[0] * s->y_dc_scale;
6434     else
6435         block[0] = block[0] * s->c_dc_scale;
6436     /* XXX: only mpeg1 */
6437     quant_matrix = s->intra_matrix;
6438     for(i=1;i<=nCoeffs;i++) {
6439         int j= s->intra_scantable.permutated[i];
6440         level = block[j];
6441         if (level) {
6442             if (level < 0) {
6443                 level = -level;
6444                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6445                 level = (level - 1) | 1;
6446                 level = -level;
6447             } else {
6448                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6449                 level = (level - 1) | 1;
6450             }
6451             block[j] = level;
6452         }
6453     }
6454 }
6455
6456 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6457                                    DCTELEM *block, int n, int qscale)
6458 {
6459     int i, level, nCoeffs;
6460     const uint16_t *quant_matrix;
6461
6462     nCoeffs= s->block_last_index[n];
6463
6464     quant_matrix = s->inter_matrix;
6465     for(i=0; i<=nCoeffs; i++) {
6466         int j= s->intra_scantable.permutated[i];
6467         level = block[j];
6468         if (level) {
6469             if (level < 0) {
6470                 level = -level;
6471                 level = (((level << 1) + 1) * qscale *
6472                          ((int) (quant_matrix[j]))) >> 4;
6473                 level = (level - 1) | 1;
6474                 level = -level;
6475             } else {
6476                 level = (((level << 1) + 1) * qscale *
6477                          ((int) (quant_matrix[j]))) >> 4;
6478                 level = (level - 1) | 1;
6479             }
6480             block[j] = level;
6481         }
6482     }
6483 }
6484
6485 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6486                                    DCTELEM *block, int n, int qscale)
6487 {
6488     int i, level, nCoeffs;
6489     const uint16_t *quant_matrix;
6490
6491     if(s->alternate_scan) nCoeffs= 63;
6492     else nCoeffs= s->block_last_index[n];
6493
6494     if (n < 4)
6495         block[0] = block[0] * s->y_dc_scale;
6496     else
6497         block[0] = block[0] * s->c_dc_scale;
6498     quant_matrix = s->intra_matrix;
6499     for(i=1;i<=nCoeffs;i++) {
6500         int j= s->intra_scantable.permutated[i];
6501         level = block[j];
6502         if (level) {
6503             if (level < 0) {
6504                 level = -level;
6505                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6506                 level = -level;
6507             } else {
6508                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6509             }
6510             block[j] = level;
6511         }
6512     }
6513 }
6514
6515 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6516                                    DCTELEM *block, int n, int qscale)
6517 {
6518     int i, level, nCoeffs;
6519     const uint16_t *quant_matrix;
6520     int sum=-1;
6521
6522     if(s->alternate_scan) nCoeffs= 63;
6523     else nCoeffs= s->block_last_index[n];
6524
6525     quant_matrix = s->inter_matrix;
6526     for(i=0; i<=nCoeffs; i++) {
6527         int j= s->intra_scantable.permutated[i];
6528         level = block[j];
6529         if (level) {
6530             if (level < 0) {
6531                 level = -level;
6532                 level = (((level << 1) + 1) * qscale *
6533                          ((int) (quant_matrix[j]))) >> 4;
6534                 level = -level;
6535             } else {
6536                 level = (((level << 1) + 1) * qscale *
6537                          ((int) (quant_matrix[j]))) >> 4;
6538             }
6539             block[j] = level;
6540             sum+=level;
6541         }
6542     }
6543     block[63]^=sum&1;
6544 }
6545
6546 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6547                                   DCTELEM *block, int n, int qscale)
6548 {
6549     int i, level, qmul, qadd;
6550     int nCoeffs;
6551
6552     assert(s->block_last_index[n]>=0);
6553
6554     qmul = qscale << 1;
6555
6556     if (!s->h263_aic) {
6557         if (n < 4)
6558             block[0] = block[0] * s->y_dc_scale;
6559         else
6560             block[0] = block[0] * s->c_dc_scale;
6561         qadd = (qscale - 1) | 1;
6562     }else{
6563         qadd = 0;
6564     }
6565     if(s->ac_pred)
6566         nCoeffs=63;
6567     else
6568         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6569
6570     for(i=1; i<=nCoeffs; i++) {
6571         level = block[i];
6572         if (level) {
6573             if (level < 0) {
6574                 level = level * qmul - qadd;
6575             } else {
6576                 level = level * qmul + qadd;
6577             }
6578             block[i] = level;
6579         }
6580     }
6581 }
6582
6583 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6584                                   DCTELEM *block, int n, int qscale)
6585 {
6586     int i, level, qmul, qadd;
6587     int nCoeffs;
6588
6589     assert(s->block_last_index[n]>=0);
6590
6591     qadd = (qscale - 1) | 1;
6592     qmul = qscale << 1;
6593
6594     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6595
6596     for(i=0; i<=nCoeffs; i++) {
6597         level = block[i];
6598         if (level) {
6599             if (level < 0) {
6600                 level = level * qmul - qadd;
6601             } else {
6602                 level = level * qmul + qadd;
6603             }
6604             block[i] = level;
6605         }
6606     }
6607 }
6608
6609 #ifdef CONFIG_ENCODERS
6610 AVCodec h263_encoder = {
6611     "h263",
6612     CODEC_TYPE_VIDEO,
6613     CODEC_ID_H263,
6614     sizeof(MpegEncContext),
6615     MPV_encode_init,
6616     MPV_encode_picture,
6617     MPV_encode_end,
6618     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6619 };
6620
6621 AVCodec h263p_encoder = {
6622     "h263p",
6623     CODEC_TYPE_VIDEO,
6624     CODEC_ID_H263P,
6625     sizeof(MpegEncContext),
6626     MPV_encode_init,
6627     MPV_encode_picture,
6628     MPV_encode_end,
6629     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6630 };
6631
6632 AVCodec flv_encoder = {
6633     "flv",
6634     CODEC_TYPE_VIDEO,
6635     CODEC_ID_FLV1,
6636     sizeof(MpegEncContext),
6637     MPV_encode_init,
6638     MPV_encode_picture,
6639     MPV_encode_end,
6640     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6641 };
6642
6643 AVCodec rv10_encoder = {
6644     "rv10",
6645     CODEC_TYPE_VIDEO,
6646     CODEC_ID_RV10,
6647     sizeof(MpegEncContext),
6648     MPV_encode_init,
6649     MPV_encode_picture,
6650     MPV_encode_end,
6651     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6652 };
6653
6654 AVCodec rv20_encoder = {
6655     "rv20",
6656     CODEC_TYPE_VIDEO,
6657     CODEC_ID_RV20,
6658     sizeof(MpegEncContext),
6659     MPV_encode_init,
6660     MPV_encode_picture,
6661     MPV_encode_end,
6662     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6663 };
6664
6665 AVCodec mpeg4_encoder = {
6666     "mpeg4",
6667     CODEC_TYPE_VIDEO,
6668     CODEC_ID_MPEG4,
6669     sizeof(MpegEncContext),
6670     MPV_encode_init,
6671     MPV_encode_picture,
6672     MPV_encode_end,
6673     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6674     .capabilities= CODEC_CAP_DELAY,
6675 };
6676
6677 AVCodec msmpeg4v1_encoder = {
6678     "msmpeg4v1",
6679     CODEC_TYPE_VIDEO,
6680     CODEC_ID_MSMPEG4V1,
6681     sizeof(MpegEncContext),
6682     MPV_encode_init,
6683     MPV_encode_picture,
6684     MPV_encode_end,
6685     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6686 };
6687
6688 AVCodec msmpeg4v2_encoder = {
6689     "msmpeg4v2",
6690     CODEC_TYPE_VIDEO,
6691     CODEC_ID_MSMPEG4V2,
6692     sizeof(MpegEncContext),
6693     MPV_encode_init,
6694     MPV_encode_picture,
6695     MPV_encode_end,
6696     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6697 };
6698
6699 AVCodec msmpeg4v3_encoder = {
6700     "msmpeg4",
6701     CODEC_TYPE_VIDEO,
6702     CODEC_ID_MSMPEG4V3,
6703     sizeof(MpegEncContext),
6704     MPV_encode_init,
6705     MPV_encode_picture,
6706     MPV_encode_end,
6707     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6708 };
6709
6710 AVCodec wmv1_encoder = {
6711     "wmv1",
6712     CODEC_TYPE_VIDEO,
6713     CODEC_ID_WMV1,
6714     sizeof(MpegEncContext),
6715     MPV_encode_init,
6716     MPV_encode_picture,
6717     MPV_encode_end,
6718     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6719 };
6720
6721 AVCodec mjpeg_encoder = {
6722     "mjpeg",
6723     CODEC_TYPE_VIDEO,
6724     CODEC_ID_MJPEG,
6725     sizeof(MpegEncContext),
6726     MPV_encode_init,
6727     MPV_encode_picture,
6728     MPV_encode_end,
6729     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6730 };
6731
6732 #endif //CONFIG_ENCODERS