]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
get rid of CODEC_FLAG_H263P_AIC with next major bump
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  *
22  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
23  */
24
25 /**
26  * @file mpegvideo.c
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "avcodec.h"
31 #include "dsputil.h"
32 #include "mpegvideo.h"
33 #include "faandct.h"
34 #include <limits.h>
35
36 #ifdef USE_FASTMEMCPY
37 #include "libvo/fastmemcpy.h"
38 #endif
39
40 //#undef NDEBUG
41 //#include <assert.h>
42
43 #ifdef CONFIG_ENCODERS
44 static int encode_picture(MpegEncContext *s, int picture_number);
45 #endif //CONFIG_ENCODERS
46 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
53                                    DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
55                                    DCTELEM *block, int n, int qscale);
56 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
57                                   DCTELEM *block, int n, int qscale);
58 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
59                                   DCTELEM *block, int n, int qscale);
60 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
61 #ifdef CONFIG_ENCODERS
62 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
63 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
64 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
65 static int sse_mb(MpegEncContext *s);
66 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
67 #endif //CONFIG_ENCODERS
68
69 #ifdef HAVE_XVMC
70 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
71 extern void XVMC_field_end(MpegEncContext *s);
72 extern void XVMC_decode_mb(MpegEncContext *s);
73 #endif
74
75 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
76
77
78 /* enable all paranoid tests for rounding, overflows, etc... */
79 //#define PARANOID
80
81 //#define DEBUG
82
83
84 /* for jpeg fast DCT */
85 #define CONST_BITS 14
86
87 static const uint16_t aanscales[64] = {
88     /* precomputed values scaled up by 14 bits */
89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
90     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
91     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
92     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
93     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
94     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
95     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
96     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
97 };
98
99 static const uint8_t h263_chroma_roundtab[16] = {
100 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
101     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
102 };
103
104 static const uint8_t ff_default_chroma_qscale_table[32]={
105 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
106     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
107 };
108
109 #ifdef CONFIG_ENCODERS
110 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
111 static uint8_t default_fcode_tab[MAX_MV*2+1];
112
113 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
114
115 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
116                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
117 {
118     int qscale;
119     int shift=0;
120
121     for(qscale=qmin; qscale<=qmax; qscale++){
122         int i;
123         if (dsp->fdct == ff_jpeg_fdct_islow
124 #ifdef FAAN_POSTSCALE
125             || dsp->fdct == ff_faandct
126 #endif
127             ) {
128             for(i=0;i<64;i++) {
129                 const int j= dsp->idct_permutation[i];
130                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
131                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
132                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
133                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
134
135                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
136                                 (qscale * quant_matrix[j]));
137             }
138         } else if (dsp->fdct == fdct_ifast
139 #ifndef FAAN_POSTSCALE
140                    || dsp->fdct == ff_faandct
141 #endif
142                    ) {
143             for(i=0;i<64;i++) {
144                 const int j= dsp->idct_permutation[i];
145                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
146                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
147                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
148                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
149
150                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
151                                 (aanscales[i] * qscale * quant_matrix[j]));
152             }
153         } else {
154             for(i=0;i<64;i++) {
155                 const int j= dsp->idct_permutation[i];
156                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
157                    So 16           <= qscale * quant_matrix[i]             <= 7905
158                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
159                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
160                 */
161                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
162 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
163                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
164
165                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
166                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
167             }
168         }
169
170         for(i=intra; i<64; i++){
171             int64_t max= 8191;
172             if (dsp->fdct == fdct_ifast
173 #ifndef FAAN_POSTSCALE
174                    || dsp->fdct == ff_faandct
175 #endif
176                    ) {
177                 max= (8191LL*aanscales[i]) >> 14;
178             }
179             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
180                 shift++;
181             }
182         }
183     }
184     if(shift){
185         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger than %d, overflows possible\n", QMAT_SHIFT - shift);
186     }
187 }
188
189 static inline void update_qscale(MpegEncContext *s){
190     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
191     s->qscale= av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
192
193     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
194 }
195 #endif //CONFIG_ENCODERS
196
197 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
198     int i;
199     int end;
200
201     st->scantable= src_scantable;
202
203     for(i=0; i<64; i++){
204         int j;
205         j = src_scantable[i];
206         st->permutated[i] = permutation[j];
207 #ifdef ARCH_POWERPC
208         st->inverse[j] = i;
209 #endif
210     }
211
212     end=-1;
213     for(i=0; i<64; i++){
214         int j;
215         j = st->permutated[i];
216         if(j>end) end=j;
217         st->raster_end[i]= end;
218     }
219 }
220
221 #ifdef CONFIG_ENCODERS
222 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
223     int i;
224
225     if(matrix){
226         put_bits(pb, 1, 1);
227         for(i=0;i<64;i++) {
228             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
229         }
230     }else
231         put_bits(pb, 1, 0);
232 }
233 #endif //CONFIG_ENCODERS
234
235 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
236     int i;
237
238     assert(p<=end);
239     if(p>=end)
240         return end;
241
242     for(i=0; i<3; i++){
243         uint32_t tmp= *state << 8;
244         *state= tmp + *(p++);
245         if(tmp == 0x100 || p==end)
246             return p;
247     }
248
249     while(p<end){
250         if     (p[-1] > 1      ) p+= 3;
251         else if(p[-2]          ) p+= 2;
252         else if(p[-3]|(p[-1]-1)) p++;
253         else{
254             p++;
255             break;
256         }
257     }
258
259     p= FFMIN(p, end)-4;
260     *state=  be2me_32(unaligned32(p));
261
262     return p+4;
263 }
264
265 /* init common dct for both encoder and decoder */
266 int DCT_common_init(MpegEncContext *s)
267 {
268     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
269     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
270     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
271     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
272     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
273     if(s->flags & CODEC_FLAG_BITEXACT)
274         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
275     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
276
277 #ifdef CONFIG_ENCODERS
278     s->dct_quantize= dct_quantize_c;
279     s->denoise_dct= denoise_dct_c;
280 #endif //CONFIG_ENCODERS
281
282 #ifdef HAVE_MMX
283     MPV_common_init_mmx(s);
284 #endif
285 #ifdef ARCH_ALPHA
286     MPV_common_init_axp(s);
287 #endif
288 #ifdef HAVE_MLIB
289     MPV_common_init_mlib(s);
290 #endif
291 #ifdef HAVE_MMI
292     MPV_common_init_mmi(s);
293 #endif
294 #ifdef ARCH_ARMV4L
295     MPV_common_init_armv4l(s);
296 #endif
297 #ifdef ARCH_POWERPC
298     MPV_common_init_ppc(s);
299 #endif
300
301 #ifdef CONFIG_ENCODERS
302     s->fast_dct_quantize= s->dct_quantize;
303
304     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
305         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
306     }
307
308 #endif //CONFIG_ENCODERS
309
310     /* load & permutate scantables
311        note: only wmv uses different ones
312     */
313     if(s->alternate_scan){
314         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
315         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
316     }else{
317         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
318         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
319     }
320     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
321     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
322
323     return 0;
324 }
325
326 static void copy_picture(Picture *dst, Picture *src){
327     *dst = *src;
328     dst->type= FF_BUFFER_TYPE_COPY;
329 }
330
331 #ifdef CONFIG_ENCODERS
332 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
333     int i;
334
335     dst->pict_type              = src->pict_type;
336     dst->quality                = src->quality;
337     dst->coded_picture_number   = src->coded_picture_number;
338     dst->display_picture_number = src->display_picture_number;
339 //    dst->reference              = src->reference;
340     dst->pts                    = src->pts;
341     dst->interlaced_frame       = src->interlaced_frame;
342     dst->top_field_first        = src->top_field_first;
343
344     if(s->avctx->me_threshold){
345         if(!src->motion_val[0])
346             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
347         if(!src->mb_type)
348             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
349         if(!src->ref_index[0])
350             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
351         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
352             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
353             src->motion_subsample_log2, dst->motion_subsample_log2);
354
355         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
356
357         for(i=0; i<2; i++){
358             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
359             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
360
361             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
362                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
363             }
364             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
365                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
366             }
367         }
368     }
369 }
370 #endif
371
372 /**
373  * allocates a Picture
374  * The pixels are allocated/set by calling get_buffer() if shared=0
375  */
376 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
377     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
378     const int mb_array_size= s->mb_stride*s->mb_height;
379     const int b8_array_size= s->b8_stride*s->mb_height*2;
380     const int b4_array_size= s->b4_stride*s->mb_height*4;
381     int i;
382
383     if(shared){
384         assert(pic->data[0]);
385         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
386         pic->type= FF_BUFFER_TYPE_SHARED;
387     }else{
388         int r;
389
390         assert(!pic->data[0]);
391
392         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
393
394         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
395             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
396             return -1;
397         }
398
399         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
400             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
401             return -1;
402         }
403
404         if(pic->linesize[1] != pic->linesize[2]){
405             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
406             return -1;
407         }
408
409         s->linesize  = pic->linesize[0];
410         s->uvlinesize= pic->linesize[1];
411     }
412
413     if(pic->qscale_table==NULL){
414         if (s->encoding) {
415             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
416             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
417             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
418         }
419
420         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
421         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
422         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
423         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
424         if(s->out_format == FMT_H264){
425             for(i=0; i<2; i++){
426                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
427                 pic->motion_val[i]= pic->motion_val_base[i]+4;
428                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
429             }
430             pic->motion_subsample_log2= 2;
431         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
432             for(i=0; i<2; i++){
433                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
434                 pic->motion_val[i]= pic->motion_val_base[i]+4;
435                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
436             }
437             pic->motion_subsample_log2= 3;
438         }
439         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
440             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
441         }
442         pic->qstride= s->mb_stride;
443         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
444     }
445
446     //it might be nicer if the application would keep track of these but it would require a API change
447     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
448     s->prev_pict_types[0]= s->pict_type;
449     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
450         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
451
452     return 0;
453 fail: //for the CHECKED_ALLOCZ macro
454     return -1;
455 }
456
457 /**
458  * deallocates a picture
459  */
460 static void free_picture(MpegEncContext *s, Picture *pic){
461     int i;
462
463     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
464         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
465     }
466
467     av_freep(&pic->mb_var);
468     av_freep(&pic->mc_mb_var);
469     av_freep(&pic->mb_mean);
470     av_freep(&pic->mbskip_table);
471     av_freep(&pic->qscale_table);
472     av_freep(&pic->mb_type_base);
473     av_freep(&pic->dct_coeff);
474     av_freep(&pic->pan_scan);
475     pic->mb_type= NULL;
476     for(i=0; i<2; i++){
477         av_freep(&pic->motion_val_base[i]);
478         av_freep(&pic->ref_index[i]);
479     }
480
481     if(pic->type == FF_BUFFER_TYPE_SHARED){
482         for(i=0; i<4; i++){
483             pic->base[i]=
484             pic->data[i]= NULL;
485         }
486         pic->type= 0;
487     }
488 }
489
490 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
491     int i;
492
493     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
494     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
495     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
496
497      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
498     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
499     s->rd_scratchpad=   s->me.scratchpad;
500     s->b_scratchpad=    s->me.scratchpad;
501     s->obmc_scratchpad= s->me.scratchpad + 16;
502     if (s->encoding) {
503         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
504         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
505         if(s->avctx->noise_reduction){
506             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
507         }
508     }
509     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
510     s->block= s->blocks[0];
511
512     for(i=0;i<12;i++){
513         s->pblocks[i] = (short *)(&s->block[i]);
514     }
515     return 0;
516 fail:
517     return -1; //free() through MPV_common_end()
518 }
519
520 static void free_duplicate_context(MpegEncContext *s){
521     if(s==NULL) return;
522
523     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
524     av_freep(&s->me.scratchpad);
525     s->rd_scratchpad=
526     s->b_scratchpad=
527     s->obmc_scratchpad= NULL;
528
529     av_freep(&s->dct_error_sum);
530     av_freep(&s->me.map);
531     av_freep(&s->me.score_map);
532     av_freep(&s->blocks);
533     s->block= NULL;
534 }
535
536 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
537 #define COPY(a) bak->a= src->a
538     COPY(allocated_edge_emu_buffer);
539     COPY(edge_emu_buffer);
540     COPY(me.scratchpad);
541     COPY(rd_scratchpad);
542     COPY(b_scratchpad);
543     COPY(obmc_scratchpad);
544     COPY(me.map);
545     COPY(me.score_map);
546     COPY(blocks);
547     COPY(block);
548     COPY(start_mb_y);
549     COPY(end_mb_y);
550     COPY(me.map_generation);
551     COPY(pb);
552     COPY(dct_error_sum);
553     COPY(dct_count[0]);
554     COPY(dct_count[1]);
555 #undef COPY
556 }
557
558 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
559     MpegEncContext bak;
560     int i;
561     //FIXME copy only needed parts
562 //START_TIMER
563     backup_duplicate_context(&bak, dst);
564     memcpy(dst, src, sizeof(MpegEncContext));
565     backup_duplicate_context(dst, &bak);
566     for(i=0;i<12;i++){
567         dst->pblocks[i] = (short *)(&dst->block[i]);
568     }
569 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
570 }
571
572 #ifdef CONFIG_ENCODERS
573 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
574 #define COPY(a) dst->a= src->a
575     COPY(pict_type);
576     COPY(current_picture);
577     COPY(f_code);
578     COPY(b_code);
579     COPY(qscale);
580     COPY(lambda);
581     COPY(lambda2);
582     COPY(picture_in_gop_number);
583     COPY(gop_picture_number);
584     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
585     COPY(progressive_frame); //FIXME don't set in encode_header
586     COPY(partitioned_frame); //FIXME don't set in encode_header
587 #undef COPY
588 }
589 #endif
590
591 /**
592  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
593  * the changed fields will not depend upon the prior state of the MpegEncContext.
594  */
595 static void MPV_common_defaults(MpegEncContext *s){
596     s->y_dc_scale_table=
597     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
598     s->chroma_qscale_table= ff_default_chroma_qscale_table;
599     s->progressive_frame= 1;
600     s->progressive_sequence= 1;
601     s->picture_structure= PICT_FRAME;
602
603     s->coded_picture_number = 0;
604     s->picture_number = 0;
605     s->input_picture_number = 0;
606
607     s->picture_in_gop_number = 0;
608
609     s->f_code = 1;
610     s->b_code = 1;
611 }
612
613 /**
614  * sets the given MpegEncContext to defaults for decoding.
615  * the changed fields will not depend upon the prior state of the MpegEncContext.
616  */
617 void MPV_decode_defaults(MpegEncContext *s){
618     MPV_common_defaults(s);
619 }
620
621 /**
622  * sets the given MpegEncContext to defaults for encoding.
623  * the changed fields will not depend upon the prior state of the MpegEncContext.
624  */
625
626 #ifdef CONFIG_ENCODERS
627 static void MPV_encode_defaults(MpegEncContext *s){
628     static int done=0;
629
630     MPV_common_defaults(s);
631
632     if(!done){
633         int i;
634         done=1;
635
636         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
637         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
638
639         for(i=-16; i<16; i++){
640             default_fcode_tab[i + MAX_MV]= 1;
641         }
642     }
643     s->me.mv_penalty= default_mv_penalty;
644     s->fcode_tab= default_fcode_tab;
645 }
646 #endif //CONFIG_ENCODERS
647
648 /**
649  * init common structure for both encoder and decoder.
650  * this assumes that some variables like width/height are already set
651  */
652 int MPV_common_init(MpegEncContext *s)
653 {
654     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
655
656     s->mb_height = (s->height + 15) / 16;
657
658     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
659         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
660         return -1;
661     }
662
663     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
664         return -1;
665
666     dsputil_init(&s->dsp, s->avctx);
667     DCT_common_init(s);
668
669     s->flags= s->avctx->flags;
670     s->flags2= s->avctx->flags2;
671
672     s->mb_width  = (s->width  + 15) / 16;
673     s->mb_stride = s->mb_width + 1;
674     s->b8_stride = s->mb_width*2 + 1;
675     s->b4_stride = s->mb_width*4 + 1;
676     mb_array_size= s->mb_height * s->mb_stride;
677     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
678
679     /* set chroma shifts */
680     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
681                                                     &(s->chroma_y_shift) );
682
683     /* set default edge pos, will be overriden in decode_header if needed */
684     s->h_edge_pos= s->mb_width*16;
685     s->v_edge_pos= s->mb_height*16;
686
687     s->mb_num = s->mb_width * s->mb_height;
688
689     s->block_wrap[0]=
690     s->block_wrap[1]=
691     s->block_wrap[2]=
692     s->block_wrap[3]= s->b8_stride;
693     s->block_wrap[4]=
694     s->block_wrap[5]= s->mb_stride;
695
696     y_size = s->b8_stride * (2 * s->mb_height + 1);
697     c_size = s->mb_stride * (s->mb_height + 1);
698     yc_size = y_size + 2 * c_size;
699
700     /* convert fourcc to upper case */
701     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
702                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
703                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
704                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
705
706     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
707                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
708                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
709                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
710
711     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
712
713     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
714     for(y=0; y<s->mb_height; y++){
715         for(x=0; x<s->mb_width; x++){
716             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
717         }
718     }
719     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
720
721     if (s->encoding) {
722         /* Allocate MV tables */
723         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
724         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
725         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
726         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
727         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
728         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
729         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
730         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
731         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
732         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
733         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
734         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
735
736         if(s->msmpeg4_version){
737             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
738         }
739         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
740
741         /* Allocate MB type table */
742         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
743
744         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
745
746         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
747         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
748         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
749         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
750         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
751         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
752
753         if(s->avctx->noise_reduction){
754             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
755         }
756     }
757     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
758
759     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
760
761     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
762         /* interlaced direct mode decoding tables */
763             for(i=0; i<2; i++){
764                 int j, k;
765                 for(j=0; j<2; j++){
766                     for(k=0; k<2; k++){
767                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
768                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
769                     }
770                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
771                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
772                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
773                 }
774                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
775             }
776     }
777     if (s->out_format == FMT_H263) {
778         /* ac values */
779         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
780         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
781         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
782         s->ac_val[2] = s->ac_val[1] + c_size;
783
784         /* cbp values */
785         CHECKED_ALLOCZ(s->coded_block_base, y_size);
786         s->coded_block= s->coded_block_base + s->b8_stride + 1;
787
788         /* cbp, ac_pred, pred_dir */
789         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
790         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
791     }
792
793     if (s->h263_pred || s->h263_plus || !s->encoding) {
794         /* dc values */
795         //MN: we need these for error resilience of intra-frames
796         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
797         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
798         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
799         s->dc_val[2] = s->dc_val[1] + c_size;
800         for(i=0;i<yc_size;i++)
801             s->dc_val_base[i] = 1024;
802     }
803
804     /* which mb is a intra block */
805     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
806     memset(s->mbintra_table, 1, mb_array_size);
807
808     /* init macroblock skip table */
809     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
810     //Note the +1 is for a quicker mpeg4 slice_end detection
811     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
812
813     s->parse_context.state= -1;
814     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
815        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
816        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
817        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
818     }
819
820     s->context_initialized = 1;
821
822     s->thread_context[0]= s;
823     for(i=1; i<s->avctx->thread_count; i++){
824         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
825         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
826     }
827
828     for(i=0; i<s->avctx->thread_count; i++){
829         if(init_duplicate_context(s->thread_context[i], s) < 0)
830            goto fail;
831         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
832         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
833     }
834
835     return 0;
836  fail:
837     MPV_common_end(s);
838     return -1;
839 }
840
841 /* init common structure for both encoder and decoder */
842 void MPV_common_end(MpegEncContext *s)
843 {
844     int i, j, k;
845
846     for(i=0; i<s->avctx->thread_count; i++){
847         free_duplicate_context(s->thread_context[i]);
848     }
849     for(i=1; i<s->avctx->thread_count; i++){
850         av_freep(&s->thread_context[i]);
851     }
852
853     av_freep(&s->parse_context.buffer);
854     s->parse_context.buffer_size=0;
855
856     av_freep(&s->mb_type);
857     av_freep(&s->p_mv_table_base);
858     av_freep(&s->b_forw_mv_table_base);
859     av_freep(&s->b_back_mv_table_base);
860     av_freep(&s->b_bidir_forw_mv_table_base);
861     av_freep(&s->b_bidir_back_mv_table_base);
862     av_freep(&s->b_direct_mv_table_base);
863     s->p_mv_table= NULL;
864     s->b_forw_mv_table= NULL;
865     s->b_back_mv_table= NULL;
866     s->b_bidir_forw_mv_table= NULL;
867     s->b_bidir_back_mv_table= NULL;
868     s->b_direct_mv_table= NULL;
869     for(i=0; i<2; i++){
870         for(j=0; j<2; j++){
871             for(k=0; k<2; k++){
872                 av_freep(&s->b_field_mv_table_base[i][j][k]);
873                 s->b_field_mv_table[i][j][k]=NULL;
874             }
875             av_freep(&s->b_field_select_table[i][j]);
876             av_freep(&s->p_field_mv_table_base[i][j]);
877             s->p_field_mv_table[i][j]=NULL;
878         }
879         av_freep(&s->p_field_select_table[i]);
880     }
881
882     av_freep(&s->dc_val_base);
883     av_freep(&s->ac_val_base);
884     av_freep(&s->coded_block_base);
885     av_freep(&s->mbintra_table);
886     av_freep(&s->cbp_table);
887     av_freep(&s->pred_dir_table);
888
889     av_freep(&s->mbskip_table);
890     av_freep(&s->prev_pict_types);
891     av_freep(&s->bitstream_buffer);
892     s->allocated_bitstream_buffer_size=0;
893
894     av_freep(&s->avctx->stats_out);
895     av_freep(&s->ac_stats);
896     av_freep(&s->error_status_table);
897     av_freep(&s->mb_index2xy);
898     av_freep(&s->lambda_table);
899     av_freep(&s->q_intra_matrix);
900     av_freep(&s->q_inter_matrix);
901     av_freep(&s->q_intra_matrix16);
902     av_freep(&s->q_inter_matrix16);
903     av_freep(&s->input_picture);
904     av_freep(&s->reordered_input_picture);
905     av_freep(&s->dct_offset);
906
907     if(s->picture){
908         for(i=0; i<MAX_PICTURE_COUNT; i++){
909             free_picture(s, &s->picture[i]);
910         }
911     }
912     av_freep(&s->picture);
913     s->context_initialized = 0;
914     s->last_picture_ptr=
915     s->next_picture_ptr=
916     s->current_picture_ptr= NULL;
917     s->linesize= s->uvlinesize= 0;
918
919     for(i=0; i<3; i++)
920         av_freep(&s->visualization_buffer[i]);
921
922     avcodec_default_free_buffers(s->avctx);
923 }
924
925 #ifdef CONFIG_ENCODERS
926
927 /* init video encoder */
928 int MPV_encode_init(AVCodecContext *avctx)
929 {
930     MpegEncContext *s = avctx->priv_data;
931     int i;
932     int chroma_h_shift, chroma_v_shift;
933
934     MPV_encode_defaults(s);
935
936     switch (avctx->codec_id) {
937     case CODEC_ID_MPEG2VIDEO:
938         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
939             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
940             return -1;
941         }
942         break;
943     case CODEC_ID_LJPEG:
944     case CODEC_ID_MJPEG:
945         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
946            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
947             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
948             return -1;
949         }
950         break;
951     default:
952         if(avctx->pix_fmt != PIX_FMT_YUV420P){
953             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
954             return -1;
955         }
956     }
957
958     switch (avctx->pix_fmt) {
959     case PIX_FMT_YUVJ422P:
960     case PIX_FMT_YUV422P:
961         s->chroma_format = CHROMA_422;
962         break;
963     case PIX_FMT_YUVJ420P:
964     case PIX_FMT_YUV420P:
965     default:
966         s->chroma_format = CHROMA_420;
967         break;
968     }
969
970     s->bit_rate = avctx->bit_rate;
971     s->width = avctx->width;
972     s->height = avctx->height;
973     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
974         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
975         avctx->gop_size=600;
976     }
977     s->gop_size = avctx->gop_size;
978     s->avctx = avctx;
979     s->flags= avctx->flags;
980     s->flags2= avctx->flags2;
981     s->max_b_frames= avctx->max_b_frames;
982     s->codec_id= avctx->codec->id;
983     s->luma_elim_threshold  = avctx->luma_elim_threshold;
984     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
985     s->strict_std_compliance= avctx->strict_std_compliance;
986     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
987     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
988     s->mpeg_quant= avctx->mpeg_quant;
989     s->rtp_mode= !!avctx->rtp_payload_size;
990     s->intra_dc_precision= avctx->intra_dc_precision;
991     s->user_specified_pts = AV_NOPTS_VALUE;
992
993     if (s->gop_size <= 1) {
994         s->intra_only = 1;
995         s->gop_size = 12;
996     } else {
997         s->intra_only = 0;
998     }
999
1000     s->me_method = avctx->me_method;
1001
1002     /* Fixed QSCALE */
1003     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
1004
1005     s->adaptive_quant= (   s->avctx->lumi_masking
1006                         || s->avctx->dark_masking
1007                         || s->avctx->temporal_cplx_masking
1008                         || s->avctx->spatial_cplx_masking
1009                         || s->avctx->p_masking
1010                         || s->avctx->border_masking
1011                         || (s->flags&CODEC_FLAG_QP_RD))
1012                        && !s->fixed_qscale;
1013
1014     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
1015     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
1016     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
1017     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
1018
1019     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
1020         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
1021         return -1;
1022     }
1023
1024     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1025         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1026     }
1027
1028     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1029         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1030         return -1;
1031     }
1032
1033     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1034         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1035         return -1;
1036     }
1037
1038     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1039        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1040        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1041
1042         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1043     }
1044
1045     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1046        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1047         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1048         return -1;
1049     }
1050
1051     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1052         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1053         return -1;
1054     }
1055
1056     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1057         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1058         return -1;
1059     }
1060
1061     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1062         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1063         return -1;
1064     }
1065
1066     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1067         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1068         return -1;
1069     }
1070
1071     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1072         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1073         return -1;
1074     }
1075
1076     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1077        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1078         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1079         return -1;
1080     }
1081
1082     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1083         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1084         return -1;
1085     }
1086
1087     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1088         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1089         return -1;
1090     }
1091
1092     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1093         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1094         return -1;
1095     }
1096
1097     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1098         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet, set threshold to 1000000000\n");
1099         return -1;
1100     }
1101
1102     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
1103         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
1104         return -1;
1105     }
1106
1107     if(s->flags & CODEC_FLAG_LOW_DELAY){
1108         if (s->codec_id != CODEC_ID_MPEG2VIDEO && s->codec_id != CODEC_ID_MPEG1VIDEO){
1109             av_log(avctx, AV_LOG_ERROR, "low delay forcing is only available for mpeg1/2\n");
1110             return -1;
1111         }
1112         if (s->max_b_frames != 0){
1113             av_log(avctx, AV_LOG_ERROR, "b frames cannot be used with low delay\n");
1114             return -1;
1115         }
1116     }
1117
1118     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1119        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1120        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1121         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1122         return -1;
1123     }
1124
1125     if(s->avctx->thread_count > 1)
1126         s->rtp_mode= 1;
1127
1128     if(!avctx->time_base.den || !avctx->time_base.num){
1129         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1130         return -1;
1131     }
1132
1133     i= (INT_MAX/2+128)>>8;
1134     if(avctx->me_threshold >= i){
1135         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1136         return -1;
1137     }
1138     if(avctx->mb_threshold >= i){
1139         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1140         return -1;
1141     }
1142
1143     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1144         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
1145         avctx->b_frame_strategy = 0;
1146     }
1147
1148     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1149     if(i > 1){
1150         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1151         avctx->time_base.den /= i;
1152         avctx->time_base.num /= i;
1153 //        return -1;
1154     }
1155
1156     if(s->codec_id==CODEC_ID_MJPEG){
1157         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1158         s->inter_quant_bias= 0;
1159     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1160         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1161         s->inter_quant_bias= 0;
1162     }else{
1163         s->intra_quant_bias=0;
1164         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1165     }
1166
1167     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1168         s->intra_quant_bias= avctx->intra_quant_bias;
1169     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1170         s->inter_quant_bias= avctx->inter_quant_bias;
1171
1172     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1173
1174     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1175         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1176         return -1;
1177     }
1178     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1179
1180     switch(avctx->codec->id) {
1181     case CODEC_ID_MPEG1VIDEO:
1182         s->out_format = FMT_MPEG1;
1183         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1184         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1185         break;
1186     case CODEC_ID_MPEG2VIDEO:
1187         s->out_format = FMT_MPEG1;
1188         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1189         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1190         s->rtp_mode= 1;
1191         break;
1192     case CODEC_ID_LJPEG:
1193     case CODEC_ID_JPEGLS:
1194     case CODEC_ID_MJPEG:
1195         s->out_format = FMT_MJPEG;
1196         s->intra_only = 1; /* force intra only for jpeg */
1197         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1198         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1199         s->mjpeg_vsample[0] = 2;
1200         s->mjpeg_vsample[1] = 2>>chroma_v_shift;
1201         s->mjpeg_vsample[2] = 2>>chroma_v_shift;
1202         s->mjpeg_hsample[0] = 2;
1203         s->mjpeg_hsample[1] = 2>>chroma_h_shift;
1204         s->mjpeg_hsample[2] = 2>>chroma_h_shift;
1205         if (mjpeg_init(s) < 0)
1206             return -1;
1207         avctx->delay=0;
1208         s->low_delay=1;
1209         break;
1210 #ifdef CONFIG_H261_ENCODER
1211     case CODEC_ID_H261:
1212         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
1213             av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.261 codec.\nValid sizes are 176x144, 352x288\n", s->width, s->height);
1214             return -1;
1215         }
1216         s->out_format = FMT_H261;
1217         avctx->delay=0;
1218         s->low_delay=1;
1219         break;
1220 #endif
1221     case CODEC_ID_H263:
1222         if (h263_get_picture_format(s->width, s->height) == 7) {
1223             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1224             return -1;
1225         }
1226         s->out_format = FMT_H263;
1227         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1228         avctx->delay=0;
1229         s->low_delay=1;
1230         break;
1231     case CODEC_ID_H263P:
1232         s->out_format = FMT_H263;
1233         s->h263_plus = 1;
1234         /* Fx */
1235         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1236         s->h263_aic= (avctx->flags & CODEC_FLAG_AC_PRED) ? 1:0;
1237         s->modified_quant= s->h263_aic;
1238         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1239         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1240         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1241         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1242         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1243
1244         /* /Fx */
1245         /* These are just to be sure */
1246         avctx->delay=0;
1247         s->low_delay=1;
1248         break;
1249     case CODEC_ID_FLV1:
1250         s->out_format = FMT_H263;
1251         s->h263_flv = 2; /* format = 1; 11-bit codes */
1252         s->unrestricted_mv = 1;
1253         s->rtp_mode=0; /* don't allow GOB */
1254         avctx->delay=0;
1255         s->low_delay=1;
1256         break;
1257     case CODEC_ID_RV10:
1258         s->out_format = FMT_H263;
1259         avctx->delay=0;
1260         s->low_delay=1;
1261         break;
1262     case CODEC_ID_RV20:
1263         s->out_format = FMT_H263;
1264         avctx->delay=0;
1265         s->low_delay=1;
1266         s->modified_quant=1;
1267         s->h263_aic=1;
1268         s->h263_plus=1;
1269         s->loop_filter=1;
1270         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1271         break;
1272     case CODEC_ID_MPEG4:
1273         s->out_format = FMT_H263;
1274         s->h263_pred = 1;
1275         s->unrestricted_mv = 1;
1276         s->low_delay= s->max_b_frames ? 0 : 1;
1277         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1278         break;
1279     case CODEC_ID_MSMPEG4V1:
1280         s->out_format = FMT_H263;
1281         s->h263_msmpeg4 = 1;
1282         s->h263_pred = 1;
1283         s->unrestricted_mv = 1;
1284         s->msmpeg4_version= 1;
1285         avctx->delay=0;
1286         s->low_delay=1;
1287         break;
1288     case CODEC_ID_MSMPEG4V2:
1289         s->out_format = FMT_H263;
1290         s->h263_msmpeg4 = 1;
1291         s->h263_pred = 1;
1292         s->unrestricted_mv = 1;
1293         s->msmpeg4_version= 2;
1294         avctx->delay=0;
1295         s->low_delay=1;
1296         break;
1297     case CODEC_ID_MSMPEG4V3:
1298         s->out_format = FMT_H263;
1299         s->h263_msmpeg4 = 1;
1300         s->h263_pred = 1;
1301         s->unrestricted_mv = 1;
1302         s->msmpeg4_version= 3;
1303         s->flipflop_rounding=1;
1304         avctx->delay=0;
1305         s->low_delay=1;
1306         break;
1307     case CODEC_ID_WMV1:
1308         s->out_format = FMT_H263;
1309         s->h263_msmpeg4 = 1;
1310         s->h263_pred = 1;
1311         s->unrestricted_mv = 1;
1312         s->msmpeg4_version= 4;
1313         s->flipflop_rounding=1;
1314         avctx->delay=0;
1315         s->low_delay=1;
1316         break;
1317     case CODEC_ID_WMV2:
1318         s->out_format = FMT_H263;
1319         s->h263_msmpeg4 = 1;
1320         s->h263_pred = 1;
1321         s->unrestricted_mv = 1;
1322         s->msmpeg4_version= 5;
1323         s->flipflop_rounding=1;
1324         avctx->delay=0;
1325         s->low_delay=1;
1326         break;
1327     default:
1328         return -1;
1329     }
1330
1331     avctx->has_b_frames= !s->low_delay;
1332
1333     s->encoding = 1;
1334
1335     /* init */
1336     if (MPV_common_init(s) < 0)
1337         return -1;
1338
1339     if(s->modified_quant)
1340         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1341     s->progressive_frame=
1342     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN));
1343     s->quant_precision=5;
1344
1345     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1346     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1347
1348 #ifdef CONFIG_H261_ENCODER
1349     if (s->out_format == FMT_H261)
1350         ff_h261_encode_init(s);
1351 #endif
1352     if (s->out_format == FMT_H263)
1353         h263_encode_init(s);
1354     if(s->msmpeg4_version)
1355         ff_msmpeg4_encode_init(s);
1356     if (s->out_format == FMT_MPEG1)
1357         ff_mpeg1_encode_init(s);
1358
1359     /* init q matrix */
1360     for(i=0;i<64;i++) {
1361         int j= s->dsp.idct_permutation[i];
1362         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1363             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1364             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1365         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1366             s->intra_matrix[j] =
1367             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1368         }else
1369         { /* mpeg1/2 */
1370             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1371             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1372         }
1373         if(s->avctx->intra_matrix)
1374             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1375         if(s->avctx->inter_matrix)
1376             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1377     }
1378
1379     /* precompute matrix */
1380     /* for mjpeg, we do include qscale in the matrix */
1381     if (s->out_format != FMT_MJPEG) {
1382         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1383                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1384         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1385                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1386     }
1387
1388     if(ff_rate_control_init(s) < 0)
1389         return -1;
1390
1391     return 0;
1392 }
1393
1394 int MPV_encode_end(AVCodecContext *avctx)
1395 {
1396     MpegEncContext *s = avctx->priv_data;
1397
1398     ff_rate_control_uninit(s);
1399
1400     MPV_common_end(s);
1401     if (s->out_format == FMT_MJPEG)
1402         mjpeg_close(s);
1403
1404     av_freep(&avctx->extradata);
1405
1406     return 0;
1407 }
1408
1409 #endif //CONFIG_ENCODERS
1410
1411 void init_rl(RLTable *rl, int use_static)
1412 {
1413     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1414     uint8_t index_run[MAX_RUN+1];
1415     int last, run, level, start, end, i;
1416
1417     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1418     if(use_static && rl->max_level[0])
1419         return;
1420
1421     /* compute max_level[], max_run[] and index_run[] */
1422     for(last=0;last<2;last++) {
1423         if (last == 0) {
1424             start = 0;
1425             end = rl->last;
1426         } else {
1427             start = rl->last;
1428             end = rl->n;
1429         }
1430
1431         memset(max_level, 0, MAX_RUN + 1);
1432         memset(max_run, 0, MAX_LEVEL + 1);
1433         memset(index_run, rl->n, MAX_RUN + 1);
1434         for(i=start;i<end;i++) {
1435             run = rl->table_run[i];
1436             level = rl->table_level[i];
1437             if (index_run[run] == rl->n)
1438                 index_run[run] = i;
1439             if (level > max_level[run])
1440                 max_level[run] = level;
1441             if (run > max_run[level])
1442                 max_run[level] = run;
1443         }
1444         if(use_static)
1445             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1446         else
1447             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1448         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1449         if(use_static)
1450             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1451         else
1452             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1453         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1454         if(use_static)
1455             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1456         else
1457             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1458         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1459     }
1460 }
1461
1462 /* draw the edges of width 'w' of an image of size width, height */
1463 //FIXME check that this is ok for mpeg4 interlaced
1464 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1465 {
1466     uint8_t *ptr, *last_line;
1467     int i;
1468
1469     last_line = buf + (height - 1) * wrap;
1470     for(i=0;i<w;i++) {
1471         /* top and bottom */
1472         memcpy(buf - (i + 1) * wrap, buf, width);
1473         memcpy(last_line + (i + 1) * wrap, last_line, width);
1474     }
1475     /* left and right */
1476     ptr = buf;
1477     for(i=0;i<height;i++) {
1478         memset(ptr - w, ptr[0], w);
1479         memset(ptr + width, ptr[width-1], w);
1480         ptr += wrap;
1481     }
1482     /* corners */
1483     for(i=0;i<w;i++) {
1484         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1485         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1486         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1487         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1488     }
1489 }
1490
1491 int ff_find_unused_picture(MpegEncContext *s, int shared){
1492     int i;
1493
1494     if(shared){
1495         for(i=0; i<MAX_PICTURE_COUNT; i++){
1496             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1497         }
1498     }else{
1499         for(i=0; i<MAX_PICTURE_COUNT; i++){
1500             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1501         }
1502         for(i=0; i<MAX_PICTURE_COUNT; i++){
1503             if(s->picture[i].data[0]==NULL) return i;
1504         }
1505     }
1506
1507     assert(0);
1508     return -1;
1509 }
1510
1511 static void update_noise_reduction(MpegEncContext *s){
1512     int intra, i;
1513
1514     for(intra=0; intra<2; intra++){
1515         if(s->dct_count[intra] > (1<<16)){
1516             for(i=0; i<64; i++){
1517                 s->dct_error_sum[intra][i] >>=1;
1518             }
1519             s->dct_count[intra] >>= 1;
1520         }
1521
1522         for(i=0; i<64; i++){
1523             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1524         }
1525     }
1526 }
1527
1528 /**
1529  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1530  */
1531 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1532 {
1533     int i;
1534     AVFrame *pic;
1535     s->mb_skipped = 0;
1536
1537     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1538
1539     /* mark&release old frames */
1540     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1541       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1542         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1543
1544         /* release forgotten pictures */
1545         /* if(mpeg124/h263) */
1546         if(!s->encoding){
1547             for(i=0; i<MAX_PICTURE_COUNT; i++){
1548                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1549                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1550                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1551                 }
1552             }
1553         }
1554       }
1555     }
1556 alloc:
1557     if(!s->encoding){
1558         /* release non reference frames */
1559         for(i=0; i<MAX_PICTURE_COUNT; i++){
1560             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1561                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1562             }
1563         }
1564
1565         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1566             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1567         else{
1568             i= ff_find_unused_picture(s, 0);
1569             pic= (AVFrame*)&s->picture[i];
1570         }
1571
1572         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1573                         && !s->dropable ? 3 : 0;
1574
1575         pic->coded_picture_number= s->coded_picture_number++;
1576
1577         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1578             return -1;
1579
1580         s->current_picture_ptr= (Picture*)pic;
1581         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1582         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1583     }
1584
1585     s->current_picture_ptr->pict_type= s->pict_type;
1586 //    if(s->flags && CODEC_FLAG_QSCALE)
1587   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1588     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1589
1590     copy_picture(&s->current_picture, s->current_picture_ptr);
1591
1592     if (s->pict_type != B_TYPE) {
1593         s->last_picture_ptr= s->next_picture_ptr;
1594         if(!s->dropable)
1595             s->next_picture_ptr= s->current_picture_ptr;
1596     }
1597 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1598         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1599         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1600         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1601         s->pict_type, s->dropable);*/
1602
1603     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1604     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1605
1606     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && !s->dropable){
1607         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1608         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1609         goto alloc;
1610     }
1611
1612     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1613
1614     if(s->picture_structure!=PICT_FRAME){
1615         int i;
1616         for(i=0; i<4; i++){
1617             if(s->picture_structure == PICT_BOTTOM_FIELD){
1618                  s->current_picture.data[i] += s->current_picture.linesize[i];
1619             }
1620             s->current_picture.linesize[i] *= 2;
1621             s->last_picture.linesize[i] *=2;
1622             s->next_picture.linesize[i] *=2;
1623         }
1624     }
1625
1626     s->hurry_up= s->avctx->hurry_up;
1627     s->error_resilience= avctx->error_resilience;
1628
1629     /* set dequantizer, we can't do it during init as it might change for mpeg4
1630        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1631     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1632         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1633         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1634     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1635         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1636         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1637     }else{
1638         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1639         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1640     }
1641
1642     if(s->dct_error_sum){
1643         assert(s->avctx->noise_reduction && s->encoding);
1644
1645         update_noise_reduction(s);
1646     }
1647
1648 #ifdef HAVE_XVMC
1649     if(s->avctx->xvmc_acceleration)
1650         return XVMC_field_start(s, avctx);
1651 #endif
1652     return 0;
1653 }
1654
1655 /* generic function for encode/decode called after a frame has been coded/decoded */
1656 void MPV_frame_end(MpegEncContext *s)
1657 {
1658     int i;
1659     /* draw edge for correct motion prediction if outside */
1660 #ifdef HAVE_XVMC
1661 //just to make sure that all data is rendered.
1662     if(s->avctx->xvmc_acceleration){
1663         XVMC_field_end(s);
1664     }else
1665 #endif
1666     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1667             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1668             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1669             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1670     }
1671     emms_c();
1672
1673     s->last_pict_type    = s->pict_type;
1674     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1675     if(s->pict_type!=B_TYPE){
1676         s->last_non_b_pict_type= s->pict_type;
1677     }
1678 #if 0
1679         /* copy back current_picture variables */
1680     for(i=0; i<MAX_PICTURE_COUNT; i++){
1681         if(s->picture[i].data[0] == s->current_picture.data[0]){
1682             s->picture[i]= s->current_picture;
1683             break;
1684         }
1685     }
1686     assert(i<MAX_PICTURE_COUNT);
1687 #endif
1688
1689     if(s->encoding){
1690         /* release non-reference frames */
1691         for(i=0; i<MAX_PICTURE_COUNT; i++){
1692             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1693                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1694             }
1695         }
1696     }
1697     // clear copies, to avoid confusion
1698 #if 0
1699     memset(&s->last_picture, 0, sizeof(Picture));
1700     memset(&s->next_picture, 0, sizeof(Picture));
1701     memset(&s->current_picture, 0, sizeof(Picture));
1702 #endif
1703     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1704 }
1705
1706 /**
1707  * draws an line from (ex, ey) -> (sx, sy).
1708  * @param w width of the image
1709  * @param h height of the image
1710  * @param stride stride/linesize of the image
1711  * @param color color of the arrow
1712  */
1713 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1714     int x, y, fr, f;
1715
1716     sx= av_clip(sx, 0, w-1);
1717     sy= av_clip(sy, 0, h-1);
1718     ex= av_clip(ex, 0, w-1);
1719     ey= av_clip(ey, 0, h-1);
1720
1721     buf[sy*stride + sx]+= color;
1722
1723     if(FFABS(ex - sx) > FFABS(ey - sy)){
1724         if(sx > ex){
1725             FFSWAP(int, sx, ex);
1726             FFSWAP(int, sy, ey);
1727         }
1728         buf+= sx + sy*stride;
1729         ex-= sx;
1730         f= ((ey-sy)<<16)/ex;
1731         for(x= 0; x <= ex; x++){
1732             y = (x*f)>>16;
1733             fr= (x*f)&0xFFFF;
1734             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1735             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1736         }
1737     }else{
1738         if(sy > ey){
1739             FFSWAP(int, sx, ex);
1740             FFSWAP(int, sy, ey);
1741         }
1742         buf+= sx + sy*stride;
1743         ey-= sy;
1744         if(ey) f= ((ex-sx)<<16)/ey;
1745         else   f= 0;
1746         for(y= 0; y <= ey; y++){
1747             x = (y*f)>>16;
1748             fr= (y*f)&0xFFFF;
1749             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1750             buf[y*stride + x+1]+= (color*         fr )>>16;;
1751         }
1752     }
1753 }
1754
1755 /**
1756  * draws an arrow from (ex, ey) -> (sx, sy).
1757  * @param w width of the image
1758  * @param h height of the image
1759  * @param stride stride/linesize of the image
1760  * @param color color of the arrow
1761  */
1762 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1763     int dx,dy;
1764
1765     sx= av_clip(sx, -100, w+100);
1766     sy= av_clip(sy, -100, h+100);
1767     ex= av_clip(ex, -100, w+100);
1768     ey= av_clip(ey, -100, h+100);
1769
1770     dx= ex - sx;
1771     dy= ey - sy;
1772
1773     if(dx*dx + dy*dy > 3*3){
1774         int rx=  dx + dy;
1775         int ry= -dx + dy;
1776         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1777
1778         //FIXME subpixel accuracy
1779         rx= ROUNDED_DIV(rx*3<<4, length);
1780         ry= ROUNDED_DIV(ry*3<<4, length);
1781
1782         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1783         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1784     }
1785     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1786 }
1787
1788 /**
1789  * prints debuging info for the given picture.
1790  */
1791 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1792
1793     if(!pict || !pict->mb_type) return;
1794
1795     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1796         int x,y;
1797
1798         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1799         switch (pict->pict_type) {
1800             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1801             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1802             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1803             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1804             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1805             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1806         }
1807         for(y=0; y<s->mb_height; y++){
1808             for(x=0; x<s->mb_width; x++){
1809                 if(s->avctx->debug&FF_DEBUG_SKIP){
1810                     int count= s->mbskip_table[x + y*s->mb_stride];
1811                     if(count>9) count=9;
1812                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1813                 }
1814                 if(s->avctx->debug&FF_DEBUG_QP){
1815                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1816                 }
1817                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1818                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1819                     //Type & MV direction
1820                     if(IS_PCM(mb_type))
1821                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1822                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1823                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1824                     else if(IS_INTRA4x4(mb_type))
1825                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1826                     else if(IS_INTRA16x16(mb_type))
1827                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1828                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1829                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1830                     else if(IS_DIRECT(mb_type))
1831                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1832                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1833                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1834                     else if(IS_GMC(mb_type))
1835                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1836                     else if(IS_SKIP(mb_type))
1837                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1838                     else if(!USES_LIST(mb_type, 1))
1839                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1840                     else if(!USES_LIST(mb_type, 0))
1841                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1842                     else{
1843                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1844                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1845                     }
1846
1847                     //segmentation
1848                     if(IS_8X8(mb_type))
1849                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1850                     else if(IS_16X8(mb_type))
1851                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1852                     else if(IS_8X16(mb_type))
1853                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1854                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1855                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1856                     else
1857                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1858
1859
1860                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1861                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1862                     else
1863                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1864                 }
1865 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1866             }
1867             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1868         }
1869     }
1870
1871     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1872         const int shift= 1 + s->quarter_sample;
1873         int mb_y;
1874         uint8_t *ptr;
1875         int i;
1876         int h_chroma_shift, v_chroma_shift;
1877         const int width = s->avctx->width;
1878         const int height= s->avctx->height;
1879         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1880         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1881         s->low_delay=0; //needed to see the vectors without trashing the buffers
1882
1883         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1884         for(i=0; i<3; i++){
1885             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1886             pict->data[i]= s->visualization_buffer[i];
1887         }
1888         pict->type= FF_BUFFER_TYPE_COPY;
1889         ptr= pict->data[0];
1890
1891         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1892             int mb_x;
1893             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1894                 const int mb_index= mb_x + mb_y*s->mb_stride;
1895                 if((s->avctx->debug_mv) && pict->motion_val){
1896                   int type;
1897                   for(type=0; type<3; type++){
1898                     int direction = 0;
1899                     switch (type) {
1900                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1901                                 continue;
1902                               direction = 0;
1903                               break;
1904                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1905                                 continue;
1906                               direction = 0;
1907                               break;
1908                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1909                                 continue;
1910                               direction = 1;
1911                               break;
1912                     }
1913                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1914                         continue;
1915
1916                     if(IS_8X8(pict->mb_type[mb_index])){
1917                       int i;
1918                       for(i=0; i<4; i++){
1919                         int sx= mb_x*16 + 4 + 8*(i&1);
1920                         int sy= mb_y*16 + 4 + 8*(i>>1);
1921                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1922                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1923                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1924                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1925                       }
1926                     }else if(IS_16X8(pict->mb_type[mb_index])){
1927                       int i;
1928                       for(i=0; i<2; i++){
1929                         int sx=mb_x*16 + 8;
1930                         int sy=mb_y*16 + 4 + 8*i;
1931                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1932                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1933                         int my=(pict->motion_val[direction][xy][1]>>shift);
1934
1935                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1936                             my*=2;
1937
1938                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1939                       }
1940                     }else if(IS_8X16(pict->mb_type[mb_index])){
1941                       int i;
1942                       for(i=0; i<2; i++){
1943                         int sx=mb_x*16 + 4 + 8*i;
1944                         int sy=mb_y*16 + 8;
1945                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1946                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1947                         int my=(pict->motion_val[direction][xy][1]>>shift);
1948
1949                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1950                             my*=2;
1951
1952                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1953                       }
1954                     }else{
1955                       int sx= mb_x*16 + 8;
1956                       int sy= mb_y*16 + 8;
1957                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1958                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1959                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1960                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1961                     }
1962                   }
1963                 }
1964                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1965                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1966                     int y;
1967                     for(y=0; y<8; y++){
1968                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1969                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1970                     }
1971                 }
1972                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1973                     int mb_type= pict->mb_type[mb_index];
1974                     uint64_t u,v;
1975                     int y;
1976 #define COLOR(theta, r)\
1977 u= (int)(128 + r*cos(theta*3.141592/180));\
1978 v= (int)(128 + r*sin(theta*3.141592/180));
1979
1980
1981                     u=v=128;
1982                     if(IS_PCM(mb_type)){
1983                         COLOR(120,48)
1984                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1985                         COLOR(30,48)
1986                     }else if(IS_INTRA4x4(mb_type)){
1987                         COLOR(90,48)
1988                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1989 //                        COLOR(120,48)
1990                     }else if(IS_DIRECT(mb_type)){
1991                         COLOR(150,48)
1992                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1993                         COLOR(170,48)
1994                     }else if(IS_GMC(mb_type)){
1995                         COLOR(190,48)
1996                     }else if(IS_SKIP(mb_type)){
1997 //                        COLOR(180,48)
1998                     }else if(!USES_LIST(mb_type, 1)){
1999                         COLOR(240,48)
2000                     }else if(!USES_LIST(mb_type, 0)){
2001                         COLOR(0,48)
2002                     }else{
2003                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
2004                         COLOR(300,48)
2005                     }
2006
2007                     u*= 0x0101010101010101ULL;
2008                     v*= 0x0101010101010101ULL;
2009                     for(y=0; y<8; y++){
2010                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
2011                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
2012                     }
2013
2014                     //segmentation
2015                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
2016                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2017                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2018                     }
2019                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
2020                         for(y=0; y<16; y++)
2021                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
2022                     }
2023                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
2024                         int dm= 1 << (mv_sample_log2-2);
2025                         for(i=0; i<4; i++){
2026                             int sx= mb_x*16 + 8*(i&1);
2027                             int sy= mb_y*16 + 8*(i>>1);
2028                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
2029                             //FIXME bidir
2030                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
2031                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
2032                                 for(y=0; y<8; y++)
2033                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
2034                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
2035                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
2036                         }
2037                     }
2038
2039                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
2040                         // hmm
2041                     }
2042                 }
2043                 s->mbskip_table[mb_index]=0;
2044             }
2045         }
2046     }
2047 }
2048
2049 #ifdef CONFIG_ENCODERS
2050
2051 static int get_sae(uint8_t *src, int ref, int stride){
2052     int x,y;
2053     int acc=0;
2054
2055     for(y=0; y<16; y++){
2056         for(x=0; x<16; x++){
2057             acc+= FFABS(src[x+y*stride] - ref);
2058         }
2059     }
2060
2061     return acc;
2062 }
2063
2064 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2065     int x, y, w, h;
2066     int acc=0;
2067
2068     w= s->width &~15;
2069     h= s->height&~15;
2070
2071     for(y=0; y<h; y+=16){
2072         for(x=0; x<w; x+=16){
2073             int offset= x + y*stride;
2074             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2075             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2076             int sae = get_sae(src + offset, mean, stride);
2077
2078             acc+= sae + 500 < sad;
2079         }
2080     }
2081     return acc;
2082 }
2083
2084
2085 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2086     AVFrame *pic=NULL;
2087     int64_t pts;
2088     int i;
2089     const int encoding_delay= s->max_b_frames;
2090     int direct=1;
2091
2092     if(pic_arg){
2093         pts= pic_arg->pts;
2094         pic_arg->display_picture_number= s->input_picture_number++;
2095
2096         if(pts != AV_NOPTS_VALUE){
2097             if(s->user_specified_pts != AV_NOPTS_VALUE){
2098                 int64_t time= pts;
2099                 int64_t last= s->user_specified_pts;
2100
2101                 if(time <= last){
2102                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2103                     return -1;
2104                 }
2105             }
2106             s->user_specified_pts= pts;
2107         }else{
2108             if(s->user_specified_pts != AV_NOPTS_VALUE){
2109                 s->user_specified_pts=
2110                 pts= s->user_specified_pts + 1;
2111                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2112             }else{
2113                 pts= pic_arg->display_picture_number;
2114             }
2115         }
2116     }
2117
2118   if(pic_arg){
2119     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2120     if(pic_arg->linesize[0] != s->linesize) direct=0;
2121     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2122     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2123
2124 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2125
2126     if(direct){
2127         i= ff_find_unused_picture(s, 1);
2128
2129         pic= (AVFrame*)&s->picture[i];
2130         pic->reference= 3;
2131
2132         for(i=0; i<4; i++){
2133             pic->data[i]= pic_arg->data[i];
2134             pic->linesize[i]= pic_arg->linesize[i];
2135         }
2136         alloc_picture(s, (Picture*)pic, 1);
2137     }else{
2138         i= ff_find_unused_picture(s, 0);
2139
2140         pic= (AVFrame*)&s->picture[i];
2141         pic->reference= 3;
2142
2143         alloc_picture(s, (Picture*)pic, 0);
2144
2145         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2146            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2147            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2148        // empty
2149         }else{
2150             int h_chroma_shift, v_chroma_shift;
2151             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2152
2153             for(i=0; i<3; i++){
2154                 int src_stride= pic_arg->linesize[i];
2155                 int dst_stride= i ? s->uvlinesize : s->linesize;
2156                 int h_shift= i ? h_chroma_shift : 0;
2157                 int v_shift= i ? v_chroma_shift : 0;
2158                 int w= s->width >>h_shift;
2159                 int h= s->height>>v_shift;
2160                 uint8_t *src= pic_arg->data[i];
2161                 uint8_t *dst= pic->data[i];
2162
2163                 if(!s->avctx->rc_buffer_size)
2164                     dst +=INPLACE_OFFSET;
2165
2166                 if(src_stride==dst_stride)
2167                     memcpy(dst, src, src_stride*h);
2168                 else{
2169                     while(h--){
2170                         memcpy(dst, src, w);
2171                         dst += dst_stride;
2172                         src += src_stride;
2173                     }
2174                 }
2175             }
2176         }
2177     }
2178     copy_picture_attributes(s, pic, pic_arg);
2179     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2180   }
2181
2182     /* shift buffer entries */
2183     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2184         s->input_picture[i-1]= s->input_picture[i];
2185
2186     s->input_picture[encoding_delay]= (Picture*)pic;
2187
2188     return 0;
2189 }
2190
2191 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2192     int x, y, plane;
2193     int score=0;
2194     int64_t score64=0;
2195
2196     for(plane=0; plane<3; plane++){
2197         const int stride= p->linesize[plane];
2198         const int bw= plane ? 1 : 2;
2199         for(y=0; y<s->mb_height*bw; y++){
2200             for(x=0; x<s->mb_width*bw; x++){
2201                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2202                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2203
2204                 switch(s->avctx->frame_skip_exp){
2205                     case 0: score= FFMAX(score, v); break;
2206                     case 1: score+= FFABS(v);break;
2207                     case 2: score+= v*v;break;
2208                     case 3: score64+= FFABS(v*v*(int64_t)v);break;
2209                     case 4: score64+= v*v*(int64_t)(v*v);break;
2210                 }
2211             }
2212         }
2213     }
2214
2215     if(score) score64= score;
2216
2217     if(score64 < s->avctx->frame_skip_threshold)
2218         return 1;
2219     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2220         return 1;
2221     return 0;
2222 }
2223
2224 static int estimate_best_b_count(MpegEncContext *s){
2225     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2226     AVCodecContext *c= avcodec_alloc_context();
2227     AVFrame input[FF_MAX_B_FRAMES+2];
2228     const int scale= s->avctx->brd_scale;
2229     int i, j, out_size, p_lambda, b_lambda, lambda2;
2230     int outbuf_size= s->width * s->height; //FIXME
2231     uint8_t *outbuf= av_malloc(outbuf_size);
2232     int64_t best_rd= INT64_MAX;
2233     int best_b_count= -1;
2234
2235     assert(scale>=0 && scale <=3);
2236
2237 //    emms_c();
2238     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2239     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2240     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2241     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2242
2243     c->width = s->width >> scale;
2244     c->height= s->height>> scale;
2245     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2246     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2247     c->mb_decision= s->avctx->mb_decision;
2248     c->me_cmp= s->avctx->me_cmp;
2249     c->mb_cmp= s->avctx->mb_cmp;
2250     c->me_sub_cmp= s->avctx->me_sub_cmp;
2251     c->pix_fmt = PIX_FMT_YUV420P;
2252     c->time_base= s->avctx->time_base;
2253     c->max_b_frames= s->max_b_frames;
2254
2255     if (avcodec_open(c, codec) < 0)
2256         return -1;
2257
2258     for(i=0; i<s->max_b_frames+2; i++){
2259         int ysize= c->width*c->height;
2260         int csize= (c->width/2)*(c->height/2);
2261         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2262
2263         avcodec_get_frame_defaults(&input[i]);
2264         input[i].data[0]= av_malloc(ysize + 2*csize);
2265         input[i].data[1]= input[i].data[0] + ysize;
2266         input[i].data[2]= input[i].data[1] + csize;
2267         input[i].linesize[0]= c->width;
2268         input[i].linesize[1]=
2269         input[i].linesize[2]= c->width/2;
2270
2271         if(pre_input_ptr && (!i || s->input_picture[i-1])) {
2272             pre_input= *pre_input_ptr;
2273
2274             if(pre_input.type != FF_BUFFER_TYPE_SHARED && i) {
2275                 pre_input.data[0]+=INPLACE_OFFSET;
2276                 pre_input.data[1]+=INPLACE_OFFSET;
2277                 pre_input.data[2]+=INPLACE_OFFSET;
2278             }
2279
2280             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2281             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2282             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2283         }
2284     }
2285
2286     for(j=0; j<s->max_b_frames+1; j++){
2287         int64_t rd=0;
2288
2289         if(!s->input_picture[j])
2290             break;
2291
2292         c->error[0]= c->error[1]= c->error[2]= 0;
2293
2294         input[0].pict_type= I_TYPE;
2295         input[0].quality= 1 * FF_QP2LAMBDA;
2296         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2297 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2298
2299         for(i=0; i<s->max_b_frames+1; i++){
2300             int is_p= i % (j+1) == j || i==s->max_b_frames;
2301
2302             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2303             input[i+1].quality= is_p ? p_lambda : b_lambda;
2304             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2305             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2306         }
2307
2308         /* get the delayed frames */
2309         while(out_size){
2310             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2311             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2312         }
2313
2314         rd += c->error[0] + c->error[1] + c->error[2];
2315
2316         if(rd < best_rd){
2317             best_rd= rd;
2318             best_b_count= j;
2319         }
2320     }
2321
2322     av_freep(&outbuf);
2323     avcodec_close(c);
2324     av_freep(&c);
2325
2326     for(i=0; i<s->max_b_frames+2; i++){
2327         av_freep(&input[i].data[0]);
2328     }
2329
2330     return best_b_count;
2331 }
2332
2333 static void select_input_picture(MpegEncContext *s){
2334     int i;
2335
2336     for(i=1; i<MAX_PICTURE_COUNT; i++)
2337         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2338     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2339
2340     /* set next picture type & ordering */
2341     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2342         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2343             s->reordered_input_picture[0]= s->input_picture[0];
2344             s->reordered_input_picture[0]->pict_type= I_TYPE;
2345             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2346         }else{
2347             int b_frames;
2348
2349             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2350                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2351                 //FIXME check that te gop check above is +-1 correct
2352 //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2353
2354                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2355                         for(i=0; i<4; i++)
2356                             s->input_picture[0]->data[i]= NULL;
2357                         s->input_picture[0]->type= 0;
2358                     }else{
2359                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2360                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2361
2362                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2363                     }
2364
2365                     emms_c();
2366                     ff_vbv_update(s, 0);
2367
2368                     goto no_output_pic;
2369                 }
2370             }
2371
2372             if(s->flags&CODEC_FLAG_PASS2){
2373                 for(i=0; i<s->max_b_frames+1; i++){
2374                     int pict_num= s->input_picture[0]->display_picture_number + i;
2375
2376                     if(pict_num >= s->rc_context.num_entries)
2377                         break;
2378                     if(!s->input_picture[i]){
2379                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2380                         break;
2381                     }
2382
2383                     s->input_picture[i]->pict_type=
2384                         s->rc_context.entry[pict_num].new_pict_type;
2385                 }
2386             }
2387
2388             if(s->avctx->b_frame_strategy==0){
2389                 b_frames= s->max_b_frames;
2390                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2391             }else if(s->avctx->b_frame_strategy==1){
2392                 for(i=1; i<s->max_b_frames+1; i++){
2393                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2394                         s->input_picture[i]->b_frame_score=
2395                             get_intra_count(s, s->input_picture[i  ]->data[0],
2396                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2397                     }
2398                 }
2399                 for(i=0; i<s->max_b_frames+1; i++){
2400                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
2401                 }
2402
2403                 b_frames= FFMAX(0, i-1);
2404
2405                 /* reset scores */
2406                 for(i=0; i<b_frames+1; i++){
2407                     s->input_picture[i]->b_frame_score=0;
2408                 }
2409             }else if(s->avctx->b_frame_strategy==2){
2410                 b_frames= estimate_best_b_count(s);
2411             }else{
2412                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2413                 b_frames=0;
2414             }
2415
2416             emms_c();
2417 //static int b_count=0;
2418 //b_count+= b_frames;
2419 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2420
2421             for(i= b_frames - 1; i>=0; i--){
2422                 int type= s->input_picture[i]->pict_type;
2423                 if(type && type != B_TYPE)
2424                     b_frames= i;
2425             }
2426             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2427                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2428             }
2429
2430             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2431               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2432                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2433               }else{
2434                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2435                     b_frames=0;
2436                 s->input_picture[b_frames]->pict_type= I_TYPE;
2437               }
2438             }
2439
2440             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2441                && b_frames
2442                && s->input_picture[b_frames]->pict_type== I_TYPE)
2443                 b_frames--;
2444
2445             s->reordered_input_picture[0]= s->input_picture[b_frames];
2446             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2447                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2448             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2449             for(i=0; i<b_frames; i++){
2450                 s->reordered_input_picture[i+1]= s->input_picture[i];
2451                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2452                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2453             }
2454         }
2455     }
2456 no_output_pic:
2457     if(s->reordered_input_picture[0]){
2458         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2459
2460         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2461
2462         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size){
2463             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2464
2465             int i= ff_find_unused_picture(s, 0);
2466             Picture *pic= &s->picture[i];
2467
2468             pic->reference              = s->reordered_input_picture[0]->reference;
2469             alloc_picture(s, pic, 0);
2470
2471             /* mark us unused / free shared pic */
2472             if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_INTERNAL)
2473                 s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
2474             for(i=0; i<4; i++)
2475                 s->reordered_input_picture[0]->data[i]= NULL;
2476             s->reordered_input_picture[0]->type= 0;
2477
2478             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2479
2480             s->current_picture_ptr= pic;
2481         }else{
2482             // input is not a shared pix -> reuse buffer for current_pix
2483
2484             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2485                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2486
2487             s->current_picture_ptr= s->reordered_input_picture[0];
2488             for(i=0; i<4; i++){
2489                 s->new_picture.data[i]+= INPLACE_OFFSET;
2490             }
2491         }
2492         copy_picture(&s->current_picture, s->current_picture_ptr);
2493
2494         s->picture_number= s->new_picture.display_picture_number;
2495 //printf("dpn:%d\n", s->picture_number);
2496     }else{
2497        memset(&s->new_picture, 0, sizeof(Picture));
2498     }
2499 }
2500
2501 int MPV_encode_picture(AVCodecContext *avctx,
2502                        unsigned char *buf, int buf_size, void *data)
2503 {
2504     MpegEncContext *s = avctx->priv_data;
2505     AVFrame *pic_arg = data;
2506     int i, stuffing_count;
2507
2508     for(i=0; i<avctx->thread_count; i++){
2509         int start_y= s->thread_context[i]->start_mb_y;
2510         int   end_y= s->thread_context[i]->  end_mb_y;
2511         int h= s->mb_height;
2512         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2513         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2514
2515         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2516     }
2517
2518     s->picture_in_gop_number++;
2519
2520     if(load_input_picture(s, pic_arg) < 0)
2521         return -1;
2522
2523     select_input_picture(s);
2524
2525     /* output? */
2526     if(s->new_picture.data[0]){
2527         s->pict_type= s->new_picture.pict_type;
2528 //emms_c();
2529 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2530         MPV_frame_start(s, avctx);
2531 vbv_retry:
2532         if (encode_picture(s, s->picture_number) < 0)
2533             return -1;
2534
2535         avctx->real_pict_num  = s->picture_number;
2536         avctx->header_bits = s->header_bits;
2537         avctx->mv_bits     = s->mv_bits;
2538         avctx->misc_bits   = s->misc_bits;
2539         avctx->i_tex_bits  = s->i_tex_bits;
2540         avctx->p_tex_bits  = s->p_tex_bits;
2541         avctx->i_count     = s->i_count;
2542         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2543         avctx->skip_count  = s->skip_count;
2544
2545         MPV_frame_end(s);
2546
2547         if (s->out_format == FMT_MJPEG)
2548             mjpeg_picture_trailer(s);
2549
2550         if(avctx->rc_buffer_size){
2551             RateControlContext *rcc= &s->rc_context;
2552             int max_size= rcc->buffer_index/3;
2553
2554             if(put_bits_count(&s->pb) > max_size && s->lambda < s->avctx->lmax){
2555                 s->next_lambda= FFMAX(s->lambda+1, s->lambda*(s->qscale+1) / s->qscale);
2556                 if(s->adaptive_quant){
2557                     int i;
2558                     for(i=0; i<s->mb_height*s->mb_stride; i++)
2559                         s->lambda_table[i]= FFMAX(s->lambda_table[i]+1, s->lambda_table[i]*(s->qscale+1) / s->qscale);
2560                 }
2561                 s->mb_skipped = 0;        //done in MPV_frame_start()
2562                 if(s->pict_type==P_TYPE){ //done in encode_picture() so we must undo it
2563                     if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
2564                         s->no_rounding ^= 1;
2565                 }
2566                 if(s->pict_type!=B_TYPE){
2567                     s->time_base= s->last_time_base;
2568                     s->last_non_b_time= s->time - s->pp_time;
2569                 }
2570 //                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
2571                 for(i=0; i<avctx->thread_count; i++){
2572                     PutBitContext *pb= &s->thread_context[i]->pb;
2573                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
2574                 }
2575                 goto vbv_retry;
2576             }
2577
2578             assert(s->avctx->rc_max_rate);
2579         }
2580
2581         if(s->flags&CODEC_FLAG_PASS1)
2582             ff_write_pass1_stats(s);
2583
2584         for(i=0; i<4; i++){
2585             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2586             avctx->error[i] += s->current_picture_ptr->error[i];
2587         }
2588
2589         if(s->flags&CODEC_FLAG_PASS1)
2590             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2591         flush_put_bits(&s->pb);
2592         s->frame_bits  = put_bits_count(&s->pb);
2593
2594         stuffing_count= ff_vbv_update(s, s->frame_bits);
2595         if(stuffing_count){
2596             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2597                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2598                 return -1;
2599             }
2600
2601             switch(s->codec_id){
2602             case CODEC_ID_MPEG1VIDEO:
2603             case CODEC_ID_MPEG2VIDEO:
2604                 while(stuffing_count--){
2605                     put_bits(&s->pb, 8, 0);
2606                 }
2607             break;
2608             case CODEC_ID_MPEG4:
2609                 put_bits(&s->pb, 16, 0);
2610                 put_bits(&s->pb, 16, 0x1C3);
2611                 stuffing_count -= 4;
2612                 while(stuffing_count--){
2613                     put_bits(&s->pb, 8, 0xFF);
2614                 }
2615             break;
2616             default:
2617                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2618             }
2619             flush_put_bits(&s->pb);
2620             s->frame_bits  = put_bits_count(&s->pb);
2621         }
2622
2623         /* update mpeg1/2 vbv_delay for CBR */
2624         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2625            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2626             int vbv_delay;
2627
2628             assert(s->repeat_first_field==0);
2629
2630             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2631             assert(vbv_delay < 0xFFFF);
2632
2633             s->vbv_delay_ptr[0] &= 0xF8;
2634             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2635             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2636             s->vbv_delay_ptr[2] &= 0x07;
2637             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2638         }
2639         s->total_bits += s->frame_bits;
2640         avctx->frame_bits  = s->frame_bits;
2641     }else{
2642         assert((pbBufPtr(&s->pb) == s->pb.buf));
2643         s->frame_bits=0;
2644     }
2645     assert((s->frame_bits&7)==0);
2646
2647     return s->frame_bits/8;
2648 }
2649
2650 #endif //CONFIG_ENCODERS
2651
2652 static inline void gmc1_motion(MpegEncContext *s,
2653                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2654                                uint8_t **ref_picture)
2655 {
2656     uint8_t *ptr;
2657     int offset, src_x, src_y, linesize, uvlinesize;
2658     int motion_x, motion_y;
2659     int emu=0;
2660
2661     motion_x= s->sprite_offset[0][0];
2662     motion_y= s->sprite_offset[0][1];
2663     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2664     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2665     motion_x<<=(3-s->sprite_warping_accuracy);
2666     motion_y<<=(3-s->sprite_warping_accuracy);
2667     src_x = av_clip(src_x, -16, s->width);
2668     if (src_x == s->width)
2669         motion_x =0;
2670     src_y = av_clip(src_y, -16, s->height);
2671     if (src_y == s->height)
2672         motion_y =0;
2673
2674     linesize = s->linesize;
2675     uvlinesize = s->uvlinesize;
2676
2677     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2678
2679     if(s->flags&CODEC_FLAG_EMU_EDGE){
2680         if(   (unsigned)src_x >= s->h_edge_pos - 17
2681            || (unsigned)src_y >= s->v_edge_pos - 17){
2682             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2683             ptr= s->edge_emu_buffer;
2684         }
2685     }
2686
2687     if((motion_x|motion_y)&7){
2688         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2689         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2690     }else{
2691         int dxy;
2692
2693         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2694         if (s->no_rounding){
2695             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2696         }else{
2697             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2698         }
2699     }
2700
2701     if(s->flags&CODEC_FLAG_GRAY) return;
2702
2703     motion_x= s->sprite_offset[1][0];
2704     motion_y= s->sprite_offset[1][1];
2705     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2706     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2707     motion_x<<=(3-s->sprite_warping_accuracy);
2708     motion_y<<=(3-s->sprite_warping_accuracy);
2709     src_x = av_clip(src_x, -8, s->width>>1);
2710     if (src_x == s->width>>1)
2711         motion_x =0;
2712     src_y = av_clip(src_y, -8, s->height>>1);
2713     if (src_y == s->height>>1)
2714         motion_y =0;
2715
2716     offset = (src_y * uvlinesize) + src_x;
2717     ptr = ref_picture[1] + offset;
2718     if(s->flags&CODEC_FLAG_EMU_EDGE){
2719         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2720            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2721             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2722             ptr= s->edge_emu_buffer;
2723             emu=1;
2724         }
2725     }
2726     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2727
2728     ptr = ref_picture[2] + offset;
2729     if(emu){
2730         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2731         ptr= s->edge_emu_buffer;
2732     }
2733     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2734
2735     return;
2736 }
2737
2738 static inline void gmc_motion(MpegEncContext *s,
2739                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2740                                uint8_t **ref_picture)
2741 {
2742     uint8_t *ptr;
2743     int linesize, uvlinesize;
2744     const int a= s->sprite_warping_accuracy;
2745     int ox, oy;
2746
2747     linesize = s->linesize;
2748     uvlinesize = s->uvlinesize;
2749
2750     ptr = ref_picture[0];
2751
2752     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2753     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2754
2755     s->dsp.gmc(dest_y, ptr, linesize, 16,
2756            ox,
2757            oy,
2758            s->sprite_delta[0][0], s->sprite_delta[0][1],
2759            s->sprite_delta[1][0], s->sprite_delta[1][1],
2760            a+1, (1<<(2*a+1)) - s->no_rounding,
2761            s->h_edge_pos, s->v_edge_pos);
2762     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2763            ox + s->sprite_delta[0][0]*8,
2764            oy + s->sprite_delta[1][0]*8,
2765            s->sprite_delta[0][0], s->sprite_delta[0][1],
2766            s->sprite_delta[1][0], s->sprite_delta[1][1],
2767            a+1, (1<<(2*a+1)) - s->no_rounding,
2768            s->h_edge_pos, s->v_edge_pos);
2769
2770     if(s->flags&CODEC_FLAG_GRAY) return;
2771
2772     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2773     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2774
2775     ptr = ref_picture[1];
2776     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2777            ox,
2778            oy,
2779            s->sprite_delta[0][0], s->sprite_delta[0][1],
2780            s->sprite_delta[1][0], s->sprite_delta[1][1],
2781            a+1, (1<<(2*a+1)) - s->no_rounding,
2782            s->h_edge_pos>>1, s->v_edge_pos>>1);
2783
2784     ptr = ref_picture[2];
2785     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2786            ox,
2787            oy,
2788            s->sprite_delta[0][0], s->sprite_delta[0][1],
2789            s->sprite_delta[1][0], s->sprite_delta[1][1],
2790            a+1, (1<<(2*a+1)) - s->no_rounding,
2791            s->h_edge_pos>>1, s->v_edge_pos>>1);
2792 }
2793
2794 /**
2795  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2796  * @param buf destination buffer
2797  * @param src source buffer
2798  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2799  * @param block_w width of block
2800  * @param block_h height of block
2801  * @param src_x x coordinate of the top left sample of the block in the source buffer
2802  * @param src_y y coordinate of the top left sample of the block in the source buffer
2803  * @param w width of the source buffer
2804  * @param h height of the source buffer
2805  */
2806 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2807                                     int src_x, int src_y, int w, int h){
2808     int x, y;
2809     int start_y, start_x, end_y, end_x;
2810
2811     if(src_y>= h){
2812         src+= (h-1-src_y)*linesize;
2813         src_y=h-1;
2814     }else if(src_y<=-block_h){
2815         src+= (1-block_h-src_y)*linesize;
2816         src_y=1-block_h;
2817     }
2818     if(src_x>= w){
2819         src+= (w-1-src_x);
2820         src_x=w-1;
2821     }else if(src_x<=-block_w){
2822         src+= (1-block_w-src_x);
2823         src_x=1-block_w;
2824     }
2825
2826     start_y= FFMAX(0, -src_y);
2827     start_x= FFMAX(0, -src_x);
2828     end_y= FFMIN(block_h, h-src_y);
2829     end_x= FFMIN(block_w, w-src_x);
2830
2831     // copy existing part
2832     for(y=start_y; y<end_y; y++){
2833         for(x=start_x; x<end_x; x++){
2834             buf[x + y*linesize]= src[x + y*linesize];
2835         }
2836     }
2837
2838     //top
2839     for(y=0; y<start_y; y++){
2840         for(x=start_x; x<end_x; x++){
2841             buf[x + y*linesize]= buf[x + start_y*linesize];
2842         }
2843     }
2844
2845     //bottom
2846     for(y=end_y; y<block_h; y++){
2847         for(x=start_x; x<end_x; x++){
2848             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2849         }
2850     }
2851
2852     for(y=0; y<block_h; y++){
2853        //left
2854         for(x=0; x<start_x; x++){
2855             buf[x + y*linesize]= buf[start_x + y*linesize];
2856         }
2857
2858        //right
2859         for(x=end_x; x<block_w; x++){
2860             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2861         }
2862     }
2863 }
2864
2865 static inline int hpel_motion(MpegEncContext *s,
2866                                   uint8_t *dest, uint8_t *src,
2867                                   int field_based, int field_select,
2868                                   int src_x, int src_y,
2869                                   int width, int height, int stride,
2870                                   int h_edge_pos, int v_edge_pos,
2871                                   int w, int h, op_pixels_func *pix_op,
2872                                   int motion_x, int motion_y)
2873 {
2874     int dxy;
2875     int emu=0;
2876
2877     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2878     src_x += motion_x >> 1;
2879     src_y += motion_y >> 1;
2880
2881     /* WARNING: do no forget half pels */
2882     src_x = av_clip(src_x, -16, width); //FIXME unneeded for emu?
2883     if (src_x == width)
2884         dxy &= ~1;
2885     src_y = av_clip(src_y, -16, height);
2886     if (src_y == height)
2887         dxy &= ~2;
2888     src += src_y * stride + src_x;
2889
2890     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2891         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2892            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2893             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2894                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2895             src= s->edge_emu_buffer;
2896             emu=1;
2897         }
2898     }
2899     if(field_select)
2900         src += s->linesize;
2901     pix_op[dxy](dest, src, stride, h);
2902     return emu;
2903 }
2904
2905 static inline int hpel_motion_lowres(MpegEncContext *s,
2906                                   uint8_t *dest, uint8_t *src,
2907                                   int field_based, int field_select,
2908                                   int src_x, int src_y,
2909                                   int width, int height, int stride,
2910                                   int h_edge_pos, int v_edge_pos,
2911                                   int w, int h, h264_chroma_mc_func *pix_op,
2912                                   int motion_x, int motion_y)
2913 {
2914     const int lowres= s->avctx->lowres;
2915     const int s_mask= (2<<lowres)-1;
2916     int emu=0;
2917     int sx, sy;
2918
2919     if(s->quarter_sample){
2920         motion_x/=2;
2921         motion_y/=2;
2922     }
2923
2924     sx= motion_x & s_mask;
2925     sy= motion_y & s_mask;
2926     src_x += motion_x >> (lowres+1);
2927     src_y += motion_y >> (lowres+1);
2928
2929     src += src_y * stride + src_x;
2930
2931     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2932        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2933         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2934                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2935         src= s->edge_emu_buffer;
2936         emu=1;
2937     }
2938
2939     sx <<= 2 - lowres;
2940     sy <<= 2 - lowres;
2941     if(field_select)
2942         src += s->linesize;
2943     pix_op[lowres](dest, src, stride, h, sx, sy);
2944     return emu;
2945 }
2946
2947 /* apply one mpeg motion vector to the three components */
2948 static av_always_inline void mpeg_motion(MpegEncContext *s,
2949                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2950                                int field_based, int bottom_field, int field_select,
2951                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2952                                int motion_x, int motion_y, int h)
2953 {
2954     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2955     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2956
2957 #if 0
2958 if(s->quarter_sample)
2959 {
2960     motion_x>>=1;
2961     motion_y>>=1;
2962 }
2963 #endif
2964
2965     v_edge_pos = s->v_edge_pos >> field_based;
2966     linesize   = s->current_picture.linesize[0] << field_based;
2967     uvlinesize = s->current_picture.linesize[1] << field_based;
2968
2969     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2970     src_x = s->mb_x* 16               + (motion_x >> 1);
2971     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2972
2973     if (s->out_format == FMT_H263) {
2974         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2975             mx = (motion_x>>1)|(motion_x&1);
2976             my = motion_y >>1;
2977             uvdxy = ((my & 1) << 1) | (mx & 1);
2978             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2979             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2980         }else{
2981             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2982             uvsrc_x = src_x>>1;
2983             uvsrc_y = src_y>>1;
2984         }
2985     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2986         mx = motion_x / 4;
2987         my = motion_y / 4;
2988         uvdxy = 0;
2989         uvsrc_x = s->mb_x*8 + mx;
2990         uvsrc_y = s->mb_y*8 + my;
2991     } else {
2992         if(s->chroma_y_shift){
2993             mx = motion_x / 2;
2994             my = motion_y / 2;
2995             uvdxy = ((my & 1) << 1) | (mx & 1);
2996             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2997             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2998         } else {
2999             if(s->chroma_x_shift){
3000             //Chroma422
3001                 mx = motion_x / 2;
3002                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
3003                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
3004                 uvsrc_y = src_y;
3005             } else {
3006             //Chroma444
3007                 uvdxy = dxy;
3008                 uvsrc_x = src_x;
3009                 uvsrc_y = src_y;
3010             }
3011         }
3012     }
3013
3014     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3015     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3016     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3017
3018     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
3019        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
3020             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
3021                s->codec_id == CODEC_ID_MPEG1VIDEO){
3022                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
3023                 return ;
3024             }
3025             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3026                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3027             ptr_y = s->edge_emu_buffer;
3028             if(!(s->flags&CODEC_FLAG_GRAY)){
3029                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3030                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3031                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3032                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3033                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3034                 ptr_cb= uvbuf;
3035                 ptr_cr= uvbuf+16;
3036             }
3037     }
3038
3039     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3040         dest_y += s->linesize;
3041         dest_cb+= s->uvlinesize;
3042         dest_cr+= s->uvlinesize;
3043     }
3044
3045     if(field_select){
3046         ptr_y += s->linesize;
3047         ptr_cb+= s->uvlinesize;
3048         ptr_cr+= s->uvlinesize;
3049     }
3050
3051     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
3052
3053     if(!(s->flags&CODEC_FLAG_GRAY)){
3054         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
3055         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
3056     }
3057 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
3058     if(s->out_format == FMT_H261){
3059         ff_h261_loop_filter(s);
3060     }
3061 #endif
3062 }
3063
3064 /* apply one mpeg motion vector to the three components */
3065 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
3066                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3067                                int field_based, int bottom_field, int field_select,
3068                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
3069                                int motion_x, int motion_y, int h)
3070 {
3071     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3072     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3073     const int lowres= s->avctx->lowres;
3074     const int block_s= 8>>lowres;
3075     const int s_mask= (2<<lowres)-1;
3076     const int h_edge_pos = s->h_edge_pos >> lowres;
3077     const int v_edge_pos = s->v_edge_pos >> lowres;
3078     linesize   = s->current_picture.linesize[0] << field_based;
3079     uvlinesize = s->current_picture.linesize[1] << field_based;
3080
3081     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3082         motion_x/=2;
3083         motion_y/=2;
3084     }
3085
3086     if(field_based){
3087         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3088     }
3089
3090     sx= motion_x & s_mask;
3091     sy= motion_y & s_mask;
3092     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3093     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3094
3095     if (s->out_format == FMT_H263) {
3096         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3097         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3098         uvsrc_x = src_x>>1;
3099         uvsrc_y = src_y>>1;
3100     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3101         mx = motion_x / 4;
3102         my = motion_y / 4;
3103         uvsx = (2*mx) & s_mask;
3104         uvsy = (2*my) & s_mask;
3105         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3106         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3107     } else {
3108         mx = motion_x / 2;
3109         my = motion_y / 2;
3110         uvsx = mx & s_mask;
3111         uvsy = my & s_mask;
3112         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3113         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3114     }
3115
3116     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3117     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3118     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3119
3120     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3121        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3122             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3123                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3124             ptr_y = s->edge_emu_buffer;
3125             if(!(s->flags&CODEC_FLAG_GRAY)){
3126                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3127                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3128                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3129                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3130                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3131                 ptr_cb= uvbuf;
3132                 ptr_cr= uvbuf+16;
3133             }
3134     }
3135
3136     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3137         dest_y += s->linesize;
3138         dest_cb+= s->uvlinesize;
3139         dest_cr+= s->uvlinesize;
3140     }
3141
3142     if(field_select){
3143         ptr_y += s->linesize;
3144         ptr_cb+= s->uvlinesize;
3145         ptr_cr+= s->uvlinesize;
3146     }
3147
3148     sx <<= 2 - lowres;
3149     sy <<= 2 - lowres;
3150     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3151
3152     if(!(s->flags&CODEC_FLAG_GRAY)){
3153         uvsx <<= 2 - lowres;
3154         uvsy <<= 2 - lowres;
3155         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3156         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3157     }
3158     //FIXME h261 lowres loop filter
3159 }
3160
3161 //FIXME move to dsputil, avg variant, 16x16 version
3162 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3163     int x;
3164     uint8_t * const top   = src[1];
3165     uint8_t * const left  = src[2];
3166     uint8_t * const mid   = src[0];
3167     uint8_t * const right = src[3];
3168     uint8_t * const bottom= src[4];
3169 #define OBMC_FILTER(x, t, l, m, r, b)\
3170     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3171 #define OBMC_FILTER4(x, t, l, m, r, b)\
3172     OBMC_FILTER(x         , t, l, m, r, b);\
3173     OBMC_FILTER(x+1       , t, l, m, r, b);\
3174     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3175     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3176
3177     x=0;
3178     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3179     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3180     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3181     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3182     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3183     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3184     x+= stride;
3185     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3186     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3187     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3188     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3189     x+= stride;
3190     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3191     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3192     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3193     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3194     x+= 2*stride;
3195     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3196     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3197     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3198     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3199     x+= 2*stride;
3200     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3201     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3202     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3203     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3204     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3205     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3206     x+= stride;
3207     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3208     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3209     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3210     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3211 }
3212
3213 /* obmc for 1 8x8 luma block */
3214 static inline void obmc_motion(MpegEncContext *s,
3215                                uint8_t *dest, uint8_t *src,
3216                                int src_x, int src_y,
3217                                op_pixels_func *pix_op,
3218                                int16_t mv[5][2]/* mid top left right bottom*/)
3219 #define MID    0
3220 {
3221     int i;
3222     uint8_t *ptr[5];
3223
3224     assert(s->quarter_sample==0);
3225
3226     for(i=0; i<5; i++){
3227         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3228             ptr[i]= ptr[MID];
3229         }else{
3230             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3231             hpel_motion(s, ptr[i], src, 0, 0,
3232                         src_x, src_y,
3233                         s->width, s->height, s->linesize,
3234                         s->h_edge_pos, s->v_edge_pos,
3235                         8, 8, pix_op,
3236                         mv[i][0], mv[i][1]);
3237         }
3238     }
3239
3240     put_obmc(dest, ptr, s->linesize);
3241 }
3242
3243 static inline void qpel_motion(MpegEncContext *s,
3244                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3245                                int field_based, int bottom_field, int field_select,
3246                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3247                                qpel_mc_func (*qpix_op)[16],
3248                                int motion_x, int motion_y, int h)
3249 {
3250     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3251     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3252
3253     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3254     src_x = s->mb_x *  16                 + (motion_x >> 2);
3255     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3256
3257     v_edge_pos = s->v_edge_pos >> field_based;
3258     linesize = s->linesize << field_based;
3259     uvlinesize = s->uvlinesize << field_based;
3260
3261     if(field_based){
3262         mx= motion_x/2;
3263         my= motion_y>>1;
3264     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3265         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3266         mx= (motion_x>>1) + rtab[motion_x&7];
3267         my= (motion_y>>1) + rtab[motion_y&7];
3268     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3269         mx= (motion_x>>1)|(motion_x&1);
3270         my= (motion_y>>1)|(motion_y&1);
3271     }else{
3272         mx= motion_x/2;
3273         my= motion_y/2;
3274     }
3275     mx= (mx>>1)|(mx&1);
3276     my= (my>>1)|(my&1);
3277
3278     uvdxy= (mx&1) | ((my&1)<<1);
3279     mx>>=1;
3280     my>>=1;
3281
3282     uvsrc_x = s->mb_x *  8                 + mx;
3283     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3284
3285     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3286     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3287     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3288
3289     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3290        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3291         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3292                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3293         ptr_y= s->edge_emu_buffer;
3294         if(!(s->flags&CODEC_FLAG_GRAY)){
3295             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3296             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3297                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3298             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3299                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3300             ptr_cb= uvbuf;
3301             ptr_cr= uvbuf + 16;
3302         }
3303     }
3304
3305     if(!field_based)
3306         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3307     else{
3308         if(bottom_field){
3309             dest_y += s->linesize;
3310             dest_cb+= s->uvlinesize;
3311             dest_cr+= s->uvlinesize;
3312         }
3313
3314         if(field_select){
3315             ptr_y  += s->linesize;
3316             ptr_cb += s->uvlinesize;
3317             ptr_cr += s->uvlinesize;
3318         }
3319         //damn interlaced mode
3320         //FIXME boundary mirroring is not exactly correct here
3321         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3322         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3323     }
3324     if(!(s->flags&CODEC_FLAG_GRAY)){
3325         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3326         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3327     }
3328 }
3329
3330 inline int ff_h263_round_chroma(int x){
3331     if (x >= 0)
3332         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3333     else {
3334         x = -x;
3335         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3336     }
3337 }
3338
3339 /**
3340  * h263 chorma 4mv motion compensation.
3341  */
3342 static inline void chroma_4mv_motion(MpegEncContext *s,
3343                                      uint8_t *dest_cb, uint8_t *dest_cr,
3344                                      uint8_t **ref_picture,
3345                                      op_pixels_func *pix_op,
3346                                      int mx, int my){
3347     int dxy, emu=0, src_x, src_y, offset;
3348     uint8_t *ptr;
3349
3350     /* In case of 8X8, we construct a single chroma motion vector
3351        with a special rounding */
3352     mx= ff_h263_round_chroma(mx);
3353     my= ff_h263_round_chroma(my);
3354
3355     dxy = ((my & 1) << 1) | (mx & 1);
3356     mx >>= 1;
3357     my >>= 1;
3358
3359     src_x = s->mb_x * 8 + mx;
3360     src_y = s->mb_y * 8 + my;
3361     src_x = av_clip(src_x, -8, s->width/2);
3362     if (src_x == s->width/2)
3363         dxy &= ~1;
3364     src_y = av_clip(src_y, -8, s->height/2);
3365     if (src_y == s->height/2)
3366         dxy &= ~2;
3367
3368     offset = (src_y * (s->uvlinesize)) + src_x;
3369     ptr = ref_picture[1] + offset;
3370     if(s->flags&CODEC_FLAG_EMU_EDGE){
3371         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3372            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3373             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3374             ptr= s->edge_emu_buffer;
3375             emu=1;
3376         }
3377     }
3378     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3379
3380     ptr = ref_picture[2] + offset;
3381     if(emu){
3382         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3383         ptr= s->edge_emu_buffer;
3384     }
3385     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3386 }
3387
3388 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3389                                      uint8_t *dest_cb, uint8_t *dest_cr,
3390                                      uint8_t **ref_picture,
3391                                      h264_chroma_mc_func *pix_op,
3392                                      int mx, int my){
3393     const int lowres= s->avctx->lowres;
3394     const int block_s= 8>>lowres;
3395     const int s_mask= (2<<lowres)-1;
3396     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3397     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3398     int emu=0, src_x, src_y, offset, sx, sy;
3399     uint8_t *ptr;
3400
3401     if(s->quarter_sample){
3402         mx/=2;
3403         my/=2;
3404     }
3405
3406     /* In case of 8X8, we construct a single chroma motion vector
3407        with a special rounding */
3408     mx= ff_h263_round_chroma(mx);
3409     my= ff_h263_round_chroma(my);
3410
3411     sx= mx & s_mask;
3412     sy= my & s_mask;
3413     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3414     src_y = s->mb_y*block_s + (my >> (lowres+1));
3415
3416     offset = src_y * s->uvlinesize + src_x;
3417     ptr = ref_picture[1] + offset;
3418     if(s->flags&CODEC_FLAG_EMU_EDGE){
3419         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3420            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3421             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3422             ptr= s->edge_emu_buffer;
3423             emu=1;
3424         }
3425     }
3426     sx <<= 2 - lowres;
3427     sy <<= 2 - lowres;
3428     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3429
3430     ptr = ref_picture[2] + offset;
3431     if(emu){
3432         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3433         ptr= s->edge_emu_buffer;
3434     }
3435     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3436 }
3437
3438 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3439     /* fetch pixels for estimated mv 4 macroblocks ahead
3440      * optimized for 64byte cache lines */
3441     const int shift = s->quarter_sample ? 2 : 1;
3442     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3443     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3444     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3445     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3446     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3447     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3448 }
3449
3450 /**
3451  * motion compensation of a single macroblock
3452  * @param s context
3453  * @param dest_y luma destination pointer
3454  * @param dest_cb chroma cb/u destination pointer
3455  * @param dest_cr chroma cr/v destination pointer
3456  * @param dir direction (0->forward, 1->backward)
3457  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3458  * @param pic_op halfpel motion compensation function (average or put normally)
3459  * @param pic_op qpel motion compensation function (average or put normally)
3460  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3461  */
3462 static inline void MPV_motion(MpegEncContext *s,
3463                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3464                               int dir, uint8_t **ref_picture,
3465                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3466 {
3467     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3468     int mb_x, mb_y, i;
3469     uint8_t *ptr, *dest;
3470
3471     mb_x = s->mb_x;
3472     mb_y = s->mb_y;
3473
3474     prefetch_motion(s, ref_picture, dir);
3475
3476     if(s->obmc && s->pict_type != B_TYPE){
3477         int16_t mv_cache[4][4][2];
3478         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3479         const int mot_stride= s->b8_stride;
3480         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3481
3482         assert(!s->mb_skipped);
3483
3484         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3485         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3486         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3487
3488         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3489             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3490         }else{
3491             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3492         }
3493
3494         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3495             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3496             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3497         }else{
3498             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3499             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3500         }
3501
3502         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3503             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3504             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3505         }else{
3506             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3507             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3508         }
3509
3510         mx = 0;
3511         my = 0;
3512         for(i=0;i<4;i++) {
3513             const int x= (i&1)+1;
3514             const int y= (i>>1)+1;
3515             int16_t mv[5][2]= {
3516                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3517                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3518                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3519                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3520                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3521             //FIXME cleanup
3522             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3523                         ref_picture[0],
3524                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3525                         pix_op[1],
3526                         mv);
3527
3528             mx += mv[0][0];
3529             my += mv[0][1];
3530         }
3531         if(!(s->flags&CODEC_FLAG_GRAY))
3532             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3533
3534         return;
3535     }
3536
3537     switch(s->mv_type) {
3538     case MV_TYPE_16X16:
3539         if(s->mcsel){
3540             if(s->real_sprite_warping_points==1){
3541                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3542                             ref_picture);
3543             }else{
3544                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3545                             ref_picture);
3546             }
3547         }else if(s->quarter_sample){
3548             qpel_motion(s, dest_y, dest_cb, dest_cr,
3549                         0, 0, 0,
3550                         ref_picture, pix_op, qpix_op,
3551                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3552         }else if(s->mspel){
3553             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3554                         ref_picture, pix_op,
3555                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3556         }else
3557         {
3558             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3559                         0, 0, 0,
3560                         ref_picture, pix_op,
3561                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3562         }
3563         break;
3564     case MV_TYPE_8X8:
3565         mx = 0;
3566         my = 0;
3567         if(s->quarter_sample){
3568             for(i=0;i<4;i++) {
3569                 motion_x = s->mv[dir][i][0];
3570                 motion_y = s->mv[dir][i][1];
3571
3572                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3573                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3574                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3575
3576                 /* WARNING: do no forget half pels */
3577                 src_x = av_clip(src_x, -16, s->width);
3578                 if (src_x == s->width)
3579                     dxy &= ~3;
3580                 src_y = av_clip(src_y, -16, s->height);
3581                 if (src_y == s->height)
3582                     dxy &= ~12;
3583
3584                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3585                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3586                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3587                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3588                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3589                         ptr= s->edge_emu_buffer;
3590                     }
3591                 }
3592                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3593                 qpix_op[1][dxy](dest, ptr, s->linesize);
3594
3595                 mx += s->mv[dir][i][0]/2;
3596                 my += s->mv[dir][i][1]/2;
3597             }
3598         }else{
3599             for(i=0;i<4;i++) {
3600                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3601                             ref_picture[0], 0, 0,
3602                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3603                             s->width, s->height, s->linesize,
3604                             s->h_edge_pos, s->v_edge_pos,
3605                             8, 8, pix_op[1],
3606                             s->mv[dir][i][0], s->mv[dir][i][1]);
3607
3608                 mx += s->mv[dir][i][0];
3609                 my += s->mv[dir][i][1];
3610             }
3611         }
3612
3613         if(!(s->flags&CODEC_FLAG_GRAY))
3614             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3615         break;
3616     case MV_TYPE_FIELD:
3617         if (s->picture_structure == PICT_FRAME) {
3618             if(s->quarter_sample){
3619                 for(i=0; i<2; i++){
3620                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3621                                 1, i, s->field_select[dir][i],
3622                                 ref_picture, pix_op, qpix_op,
3623                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3624                 }
3625             }else{
3626                 /* top field */
3627                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3628                             1, 0, s->field_select[dir][0],
3629                             ref_picture, pix_op,
3630                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3631                 /* bottom field */
3632                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3633                             1, 1, s->field_select[dir][1],
3634                             ref_picture, pix_op,
3635                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3636             }
3637         } else {
3638             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3639                 ref_picture= s->current_picture_ptr->data;
3640             }
3641
3642             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3643                         0, 0, s->field_select[dir][0],
3644                         ref_picture, pix_op,
3645                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3646         }
3647         break;
3648     case MV_TYPE_16X8:
3649         for(i=0; i<2; i++){
3650             uint8_t ** ref2picture;
3651
3652             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3653                 ref2picture= ref_picture;
3654             }else{
3655                 ref2picture= s->current_picture_ptr->data;
3656             }
3657
3658             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3659                         0, 0, s->field_select[dir][i],
3660                         ref2picture, pix_op,
3661                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3662
3663             dest_y += 16*s->linesize;
3664             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3665             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3666         }
3667         break;
3668     case MV_TYPE_DMV:
3669         if(s->picture_structure == PICT_FRAME){
3670             for(i=0; i<2; i++){
3671                 int j;
3672                 for(j=0; j<2; j++){
3673                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3674                                 1, j, j^i,
3675                                 ref_picture, pix_op,
3676                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3677                 }
3678                 pix_op = s->dsp.avg_pixels_tab;
3679             }
3680         }else{
3681             for(i=0; i<2; i++){
3682                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3683                             0, 0, s->picture_structure != i+1,
3684                             ref_picture, pix_op,
3685                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3686
3687                 // after put we make avg of the same block
3688                 pix_op=s->dsp.avg_pixels_tab;
3689
3690                 //opposite parity is always in the same frame if this is second field
3691                 if(!s->first_field){
3692                     ref_picture = s->current_picture_ptr->data;
3693                 }
3694             }
3695         }
3696     break;
3697     default: assert(0);
3698     }
3699 }
3700
3701 /**
3702  * motion compensation of a single macroblock
3703  * @param s context
3704  * @param dest_y luma destination pointer
3705  * @param dest_cb chroma cb/u destination pointer
3706  * @param dest_cr chroma cr/v destination pointer
3707  * @param dir direction (0->forward, 1->backward)
3708  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3709  * @param pic_op halfpel motion compensation function (average or put normally)
3710  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3711  */
3712 static inline void MPV_motion_lowres(MpegEncContext *s,
3713                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3714                               int dir, uint8_t **ref_picture,
3715                               h264_chroma_mc_func *pix_op)
3716 {
3717     int mx, my;
3718     int mb_x, mb_y, i;
3719     const int lowres= s->avctx->lowres;
3720     const int block_s= 8>>lowres;
3721
3722     mb_x = s->mb_x;
3723     mb_y = s->mb_y;
3724
3725     switch(s->mv_type) {
3726     case MV_TYPE_16X16:
3727         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3728                     0, 0, 0,
3729                     ref_picture, pix_op,
3730                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3731         break;
3732     case MV_TYPE_8X8:
3733         mx = 0;
3734         my = 0;
3735             for(i=0;i<4;i++) {
3736                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3737                             ref_picture[0], 0, 0,
3738                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3739                             s->width, s->height, s->linesize,
3740                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3741                             block_s, block_s, pix_op,
3742                             s->mv[dir][i][0], s->mv[dir][i][1]);
3743
3744                 mx += s->mv[dir][i][0];
3745                 my += s->mv[dir][i][1];
3746             }
3747
3748         if(!(s->flags&CODEC_FLAG_GRAY))
3749             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3750         break;
3751     case MV_TYPE_FIELD:
3752         if (s->picture_structure == PICT_FRAME) {
3753             /* top field */
3754             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3755                         1, 0, s->field_select[dir][0],
3756                         ref_picture, pix_op,
3757                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3758             /* bottom field */
3759             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3760                         1, 1, s->field_select[dir][1],
3761                         ref_picture, pix_op,
3762                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3763         } else {
3764             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3765                 ref_picture= s->current_picture_ptr->data;
3766             }
3767
3768             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3769                         0, 0, s->field_select[dir][0],
3770                         ref_picture, pix_op,
3771                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3772         }
3773         break;
3774     case MV_TYPE_16X8:
3775         for(i=0; i<2; i++){
3776             uint8_t ** ref2picture;
3777
3778             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3779                 ref2picture= ref_picture;
3780             }else{
3781                 ref2picture= s->current_picture_ptr->data;
3782             }
3783
3784             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3785                         0, 0, s->field_select[dir][i],
3786                         ref2picture, pix_op,
3787                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3788
3789             dest_y += 2*block_s*s->linesize;
3790             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3791             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3792         }
3793         break;
3794     case MV_TYPE_DMV:
3795         if(s->picture_structure == PICT_FRAME){
3796             for(i=0; i<2; i++){
3797                 int j;
3798                 for(j=0; j<2; j++){
3799                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3800                                 1, j, j^i,
3801                                 ref_picture, pix_op,
3802                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3803                 }
3804                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3805             }
3806         }else{
3807             for(i=0; i<2; i++){
3808                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3809                             0, 0, s->picture_structure != i+1,
3810                             ref_picture, pix_op,
3811                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3812
3813                 // after put we make avg of the same block
3814                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3815
3816                 //opposite parity is always in the same frame if this is second field
3817                 if(!s->first_field){
3818                     ref_picture = s->current_picture_ptr->data;
3819                 }
3820             }
3821         }
3822     break;
3823     default: assert(0);
3824     }
3825 }
3826
3827 /* put block[] to dest[] */
3828 static inline void put_dct(MpegEncContext *s,
3829                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3830 {
3831     s->dct_unquantize_intra(s, block, i, qscale);
3832     s->dsp.idct_put (dest, line_size, block);
3833 }
3834
3835 /* add block[] to dest[] */
3836 static inline void add_dct(MpegEncContext *s,
3837                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3838 {
3839     if (s->block_last_index[i] >= 0) {
3840         s->dsp.idct_add (dest, line_size, block);
3841     }
3842 }
3843
3844 static inline void add_dequant_dct(MpegEncContext *s,
3845                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3846 {
3847     if (s->block_last_index[i] >= 0) {
3848         s->dct_unquantize_inter(s, block, i, qscale);
3849
3850         s->dsp.idct_add (dest, line_size, block);
3851     }
3852 }
3853
3854 /**
3855  * cleans dc, ac, coded_block for the current non intra MB
3856  */
3857 void ff_clean_intra_table_entries(MpegEncContext *s)
3858 {
3859     int wrap = s->b8_stride;
3860     int xy = s->block_index[0];
3861
3862     s->dc_val[0][xy           ] =
3863     s->dc_val[0][xy + 1       ] =
3864     s->dc_val[0][xy     + wrap] =
3865     s->dc_val[0][xy + 1 + wrap] = 1024;
3866     /* ac pred */
3867     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3868     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3869     if (s->msmpeg4_version>=3) {
3870         s->coded_block[xy           ] =
3871         s->coded_block[xy + 1       ] =
3872         s->coded_block[xy     + wrap] =
3873         s->coded_block[xy + 1 + wrap] = 0;
3874     }
3875     /* chroma */
3876     wrap = s->mb_stride;
3877     xy = s->mb_x + s->mb_y * wrap;
3878     s->dc_val[1][xy] =
3879     s->dc_val[2][xy] = 1024;
3880     /* ac pred */
3881     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3882     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3883
3884     s->mbintra_table[xy]= 0;
3885 }
3886
3887 /* generic function called after a macroblock has been parsed by the
3888    decoder or after it has been encoded by the encoder.
3889
3890    Important variables used:
3891    s->mb_intra : true if intra macroblock
3892    s->mv_dir   : motion vector direction
3893    s->mv_type  : motion vector type
3894    s->mv       : motion vector
3895    s->interlaced_dct : true if interlaced dct used (mpeg2)
3896  */
3897 static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3898 {
3899     int mb_x, mb_y;
3900     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3901 #ifdef HAVE_XVMC
3902     if(s->avctx->xvmc_acceleration){
3903         XVMC_decode_mb(s);//xvmc uses pblocks
3904         return;
3905     }
3906 #endif
3907
3908     mb_x = s->mb_x;
3909     mb_y = s->mb_y;
3910
3911     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3912        /* save DCT coefficients */
3913        int i,j;
3914        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3915        for(i=0; i<6; i++)
3916            for(j=0; j<64; j++)
3917                *dct++ = block[i][s->dsp.idct_permutation[j]];
3918     }
3919
3920     s->current_picture.qscale_table[mb_xy]= s->qscale;
3921
3922     /* update DC predictors for P macroblocks */
3923     if (!s->mb_intra) {
3924         if (s->h263_pred || s->h263_aic) {
3925             if(s->mbintra_table[mb_xy])
3926                 ff_clean_intra_table_entries(s);
3927         } else {
3928             s->last_dc[0] =
3929             s->last_dc[1] =
3930             s->last_dc[2] = 128 << s->intra_dc_precision;
3931         }
3932     }
3933     else if (s->h263_pred || s->h263_aic)
3934         s->mbintra_table[mb_xy]=1;
3935
3936     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
3937         uint8_t *dest_y, *dest_cb, *dest_cr;
3938         int dct_linesize, dct_offset;
3939         op_pixels_func (*op_pix)[4];
3940         qpel_mc_func (*op_qpix)[16];
3941         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3942         const int uvlinesize= s->current_picture.linesize[1];
3943         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3944         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3945
3946         /* avoid copy if macroblock skipped in last frame too */
3947         /* skip only during decoding as we might trash the buffers during encoding a bit */
3948         if(!s->encoding){
3949             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3950             const int age= s->current_picture.age;
3951
3952             assert(age);
3953
3954             if (s->mb_skipped) {
3955                 s->mb_skipped= 0;
3956                 assert(s->pict_type!=I_TYPE);
3957
3958                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3959                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3960
3961                 /* if previous was skipped too, then nothing to do !  */
3962                 if (*mbskip_ptr >= age && s->current_picture.reference){
3963                     return;
3964                 }
3965             } else if(!s->current_picture.reference){
3966                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3967                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3968             } else{
3969                 *mbskip_ptr = 0; /* not skipped */
3970             }
3971         }
3972
3973         dct_linesize = linesize << s->interlaced_dct;
3974         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3975
3976         if(readable){
3977             dest_y=  s->dest[0];
3978             dest_cb= s->dest[1];
3979             dest_cr= s->dest[2];
3980         }else{
3981             dest_y = s->b_scratchpad;
3982             dest_cb= s->b_scratchpad+16*linesize;
3983             dest_cr= s->b_scratchpad+32*linesize;
3984         }
3985
3986         if (!s->mb_intra) {
3987             /* motion handling */
3988             /* decoding or more than one mb_type (MC was already done otherwise) */
3989             if(!s->encoding){
3990                 if(lowres_flag){
3991                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3992
3993                     if (s->mv_dir & MV_DIR_FORWARD) {
3994                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3995                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3996                     }
3997                     if (s->mv_dir & MV_DIR_BACKWARD) {
3998                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3999                     }
4000                 }else{
4001                     op_qpix= s->me.qpel_put;
4002                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
4003                         op_pix = s->dsp.put_pixels_tab;
4004                     }else{
4005                         op_pix = s->dsp.put_no_rnd_pixels_tab;
4006                     }
4007                     if (s->mv_dir & MV_DIR_FORWARD) {
4008                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4009                         op_pix = s->dsp.avg_pixels_tab;
4010                         op_qpix= s->me.qpel_avg;
4011                     }
4012                     if (s->mv_dir & MV_DIR_BACKWARD) {
4013                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4014                     }
4015                 }
4016             }
4017
4018             /* skip dequant / idct if we are really late ;) */
4019             if(s->hurry_up>1) goto skip_idct;
4020             if(s->avctx->skip_idct){
4021                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
4022                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
4023                    || s->avctx->skip_idct >= AVDISCARD_ALL)
4024                     goto skip_idct;
4025             }
4026
4027             /* add dct residue */
4028             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
4029                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
4030                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4031                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4032                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4033                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4034
4035                 if(!(s->flags&CODEC_FLAG_GRAY)){
4036                     if (s->chroma_y_shift){
4037                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4038                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4039                     }else{
4040                         dct_linesize >>= 1;
4041                         dct_offset >>=1;
4042                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4043                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4044                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4045                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4046                     }
4047                 }
4048             } else if(s->codec_id != CODEC_ID_WMV2){
4049                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
4050                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
4051                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
4052                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
4053
4054                 if(!(s->flags&CODEC_FLAG_GRAY)){
4055                     if(s->chroma_y_shift){//Chroma420
4056                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
4057                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
4058                     }else{
4059                         //chroma422
4060                         dct_linesize = uvlinesize << s->interlaced_dct;
4061                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4062
4063                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
4064                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
4065                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
4066                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
4067                         if(!s->chroma_x_shift){//Chroma444
4068                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
4069                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
4070                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
4071                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
4072                         }
4073                     }
4074                 }//fi gray
4075             }
4076             else{
4077                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
4078             }
4079         } else {
4080             /* dct only in intra block */
4081             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4082                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4083                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4084                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4085                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4086
4087                 if(!(s->flags&CODEC_FLAG_GRAY)){
4088                     if(s->chroma_y_shift){
4089                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4090                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4091                     }else{
4092                         dct_offset >>=1;
4093                         dct_linesize >>=1;
4094                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4095                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4096                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4097                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4098                     }
4099                 }
4100             }else{
4101                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4102                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4103                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4104                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4105
4106                 if(!(s->flags&CODEC_FLAG_GRAY)){
4107                     if(s->chroma_y_shift){
4108                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4109                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4110                     }else{
4111
4112                         dct_linesize = uvlinesize << s->interlaced_dct;
4113                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4114
4115                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4116                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4117                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4118                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4119                         if(!s->chroma_x_shift){//Chroma444
4120                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4121                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4122                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4123                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4124                         }
4125                     }
4126                 }//gray
4127             }
4128         }
4129 skip_idct:
4130         if(!readable){
4131             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4132             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4133             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4134         }
4135     }
4136 }
4137
4138 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4139     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4140     else                  MPV_decode_mb_internal(s, block, 0);
4141 }
4142
4143 #ifdef CONFIG_ENCODERS
4144
4145 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4146 {
4147     static const char tab[64]=
4148         {3,2,2,1,1,1,1,1,
4149          1,1,1,1,1,1,1,1,
4150          1,1,1,1,1,1,1,1,
4151          0,0,0,0,0,0,0,0,
4152          0,0,0,0,0,0,0,0,
4153          0,0,0,0,0,0,0,0,
4154          0,0,0,0,0,0,0,0,
4155          0,0,0,0,0,0,0,0};
4156     int score=0;
4157     int run=0;
4158     int i;
4159     DCTELEM *block= s->block[n];
4160     const int last_index= s->block_last_index[n];
4161     int skip_dc;
4162
4163     if(threshold<0){
4164         skip_dc=0;
4165         threshold= -threshold;
4166     }else
4167         skip_dc=1;
4168
4169     /* are all which we could set to zero are allready zero? */
4170     if(last_index<=skip_dc - 1) return;
4171
4172     for(i=0; i<=last_index; i++){
4173         const int j = s->intra_scantable.permutated[i];
4174         const int level = FFABS(block[j]);
4175         if(level==1){
4176             if(skip_dc && i==0) continue;
4177             score+= tab[run];
4178             run=0;
4179         }else if(level>1){
4180             return;
4181         }else{
4182             run++;
4183         }
4184     }
4185     if(score >= threshold) return;
4186     for(i=skip_dc; i<=last_index; i++){
4187         const int j = s->intra_scantable.permutated[i];
4188         block[j]=0;
4189     }
4190     if(block[0]) s->block_last_index[n]= 0;
4191     else         s->block_last_index[n]= -1;
4192 }
4193
4194 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4195 {
4196     int i;
4197     const int maxlevel= s->max_qcoeff;
4198     const int minlevel= s->min_qcoeff;
4199     int overflow=0;
4200
4201     if(s->mb_intra){
4202         i=1; //skip clipping of intra dc
4203     }else
4204         i=0;
4205
4206     for(;i<=last_index; i++){
4207         const int j= s->intra_scantable.permutated[i];
4208         int level = block[j];
4209
4210         if     (level>maxlevel){
4211             level=maxlevel;
4212             overflow++;
4213         }else if(level<minlevel){
4214             level=minlevel;
4215             overflow++;
4216         }
4217
4218         block[j]= level;
4219     }
4220
4221     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4222         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4223 }
4224
4225 #endif //CONFIG_ENCODERS
4226
4227 /**
4228  *
4229  * @param h is the normal height, this will be reduced automatically if needed for the last row
4230  */
4231 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4232     if (s->avctx->draw_horiz_band) {
4233         AVFrame *src;
4234         int offset[4];
4235
4236         if(s->picture_structure != PICT_FRAME){
4237             h <<= 1;
4238             y <<= 1;
4239             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4240         }
4241
4242         h= FFMIN(h, s->avctx->height - y);
4243
4244         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4245             src= (AVFrame*)s->current_picture_ptr;
4246         else if(s->last_picture_ptr)
4247             src= (AVFrame*)s->last_picture_ptr;
4248         else
4249             return;
4250
4251         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4252             offset[0]=
4253             offset[1]=
4254             offset[2]=
4255             offset[3]= 0;
4256         }else{
4257             offset[0]= y * s->linesize;;
4258             offset[1]=
4259             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4260             offset[3]= 0;
4261         }
4262
4263         emms_c();
4264
4265         s->avctx->draw_horiz_band(s->avctx, src, offset,
4266                                   y, s->picture_structure, h);
4267     }
4268 }
4269
4270 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4271     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4272     const int uvlinesize= s->current_picture.linesize[1];
4273     const int mb_size= 4 - s->avctx->lowres;
4274
4275     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4276     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4277     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4278     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4279     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4280     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4281     //block_index is not used by mpeg2, so it is not affected by chroma_format
4282
4283     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4284     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4285     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4286
4287     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4288     {
4289         s->dest[0] += s->mb_y *   linesize << mb_size;
4290         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4291         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4292     }
4293 }
4294
4295 #ifdef CONFIG_ENCODERS
4296
4297 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4298     int x, y;
4299 //FIXME optimize
4300     for(y=0; y<8; y++){
4301         for(x=0; x<8; x++){
4302             int x2, y2;
4303             int sum=0;
4304             int sqr=0;
4305             int count=0;
4306
4307             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4308                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4309                     int v= ptr[x2 + y2*stride];
4310                     sum += v;
4311                     sqr += v*v;
4312                     count++;
4313                 }
4314             }
4315             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4316         }
4317     }
4318 }
4319
4320 static av_always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
4321 {
4322     int16_t weight[8][64];
4323     DCTELEM orig[8][64];
4324     const int mb_x= s->mb_x;
4325     const int mb_y= s->mb_y;
4326     int i;
4327     int skip_dct[8];
4328     int dct_offset   = s->linesize*8; //default for progressive frames
4329     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4330     int wrap_y, wrap_c;
4331
4332     for(i=0; i<mb_block_count; i++) skip_dct[i]=s->skipdct;
4333
4334     if(s->adaptive_quant){
4335         const int last_qp= s->qscale;
4336         const int mb_xy= mb_x + mb_y*s->mb_stride;
4337
4338         s->lambda= s->lambda_table[mb_xy];
4339         update_qscale(s);
4340
4341         if(!(s->flags&CODEC_FLAG_QP_RD)){
4342             s->qscale= s->current_picture_ptr->qscale_table[mb_xy];
4343             s->dquant= s->qscale - last_qp;
4344
4345             if(s->out_format==FMT_H263){
4346                 s->dquant= av_clip(s->dquant, -2, 2);
4347
4348                 if(s->codec_id==CODEC_ID_MPEG4){
4349                     if(!s->mb_intra){
4350                         if(s->pict_type == B_TYPE){
4351                             if(s->dquant&1 || s->mv_dir&MV_DIRECT)
4352                                 s->dquant= 0;
4353                         }
4354                         if(s->mv_type==MV_TYPE_8X8)
4355                             s->dquant=0;
4356                     }
4357                 }
4358             }
4359         }
4360         ff_set_qscale(s, last_qp + s->dquant);
4361     }else if(s->flags&CODEC_FLAG_QP_RD)
4362         ff_set_qscale(s, s->qscale + s->dquant);
4363
4364     wrap_y = s->linesize;
4365     wrap_c = s->uvlinesize;
4366     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4367     ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4368     ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4369
4370     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4371         uint8_t *ebuf= s->edge_emu_buffer + 32;
4372         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4373         ptr_y= ebuf;
4374         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4375         ptr_cb= ebuf+18*wrap_y;
4376         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4377         ptr_cr= ebuf+18*wrap_y+8;
4378     }
4379
4380     if (s->mb_intra) {
4381         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4382             int progressive_score, interlaced_score;
4383
4384             s->interlaced_dct=0;
4385             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4386                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4387
4388             if(progressive_score > 0){
4389                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4390                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4391                 if(progressive_score > interlaced_score){
4392                     s->interlaced_dct=1;
4393
4394                     dct_offset= wrap_y;
4395                     wrap_y<<=1;
4396                     if (s->chroma_format == CHROMA_422)
4397                         wrap_c<<=1;
4398                 }
4399             }
4400         }
4401
4402         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4403         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4404         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4405         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4406
4407         if(s->flags&CODEC_FLAG_GRAY){
4408             skip_dct[4]= 1;
4409             skip_dct[5]= 1;
4410         }else{
4411             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4412             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4413             if(!s->chroma_y_shift){ /* 422 */
4414                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
4415                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
4416             }
4417         }
4418     }else{
4419         op_pixels_func (*op_pix)[4];
4420         qpel_mc_func (*op_qpix)[16];
4421         uint8_t *dest_y, *dest_cb, *dest_cr;
4422
4423         dest_y  = s->dest[0];
4424         dest_cb = s->dest[1];
4425         dest_cr = s->dest[2];
4426
4427         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4428             op_pix = s->dsp.put_pixels_tab;
4429             op_qpix= s->dsp.put_qpel_pixels_tab;
4430         }else{
4431             op_pix = s->dsp.put_no_rnd_pixels_tab;
4432             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4433         }
4434
4435         if (s->mv_dir & MV_DIR_FORWARD) {
4436             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4437             op_pix = s->dsp.avg_pixels_tab;
4438             op_qpix= s->dsp.avg_qpel_pixels_tab;
4439         }
4440         if (s->mv_dir & MV_DIR_BACKWARD) {
4441             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4442         }
4443
4444         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4445             int progressive_score, interlaced_score;
4446
4447             s->interlaced_dct=0;
4448             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4449                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4450
4451             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4452
4453             if(progressive_score>0){
4454                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4455                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4456
4457                 if(progressive_score > interlaced_score){
4458                     s->interlaced_dct=1;
4459
4460                     dct_offset= wrap_y;
4461                     wrap_y<<=1;
4462                     if (s->chroma_format == CHROMA_422)
4463                         wrap_c<<=1;
4464                 }
4465             }
4466         }
4467
4468         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4469         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4470         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4471         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4472
4473         if(s->flags&CODEC_FLAG_GRAY){
4474             skip_dct[4]= 1;
4475             skip_dct[5]= 1;
4476         }else{
4477             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4478             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4479             if(!s->chroma_y_shift){ /* 422 */
4480                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
4481                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
4482             }
4483         }
4484         /* pre quantization */
4485         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4486             //FIXME optimize
4487             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4488             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4489             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4490             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4491             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4492             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4493             if(!s->chroma_y_shift){ /* 422 */
4494                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
4495                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
4496             }
4497         }
4498     }
4499
4500     if(s->avctx->quantizer_noise_shaping){
4501         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4502         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4503         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4504         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4505         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4506         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4507         if(!s->chroma_y_shift){ /* 422 */
4508             if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
4509             if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
4510         }
4511         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
4512     }
4513
4514     /* DCT & quantize */
4515     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4516     {
4517         for(i=0;i<mb_block_count;i++) {
4518             if(!skip_dct[i]){
4519                 int overflow;
4520                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4521             // FIXME we could decide to change to quantizer instead of clipping
4522             // JS: I don't think that would be a good idea it could lower quality instead
4523             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4524                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4525             }else
4526                 s->block_last_index[i]= -1;
4527         }
4528         if(s->avctx->quantizer_noise_shaping){
4529             for(i=0;i<mb_block_count;i++) {
4530                 if(!skip_dct[i]){
4531                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4532                 }
4533             }
4534         }
4535
4536         if(s->luma_elim_threshold && !s->mb_intra)
4537             for(i=0; i<4; i++)
4538                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4539         if(s->chroma_elim_threshold && !s->mb_intra)
4540             for(i=4; i<mb_block_count; i++)
4541                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4542
4543         if(s->flags & CODEC_FLAG_CBP_RD){
4544             for(i=0;i<mb_block_count;i++) {
4545                 if(s->block_last_index[i] == -1)
4546                     s->coded_score[i]= INT_MAX/256;
4547             }
4548         }
4549     }
4550
4551     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4552         s->block_last_index[4]=
4553         s->block_last_index[5]= 0;
4554         s->block[4][0]=
4555         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4556     }
4557
4558     //non c quantize code returns incorrect block_last_index FIXME
4559     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4560         for(i=0; i<mb_block_count; i++){
4561             int j;
4562             if(s->block_last_index[i]>0){
4563                 for(j=63; j>0; j--){
4564                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4565                 }
4566                 s->block_last_index[i]= j;
4567             }
4568         }
4569     }
4570
4571     /* huffman encode */
4572     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4573     case CODEC_ID_MPEG1VIDEO:
4574     case CODEC_ID_MPEG2VIDEO:
4575         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4576     case CODEC_ID_MPEG4:
4577         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4578     case CODEC_ID_MSMPEG4V2:
4579     case CODEC_ID_MSMPEG4V3:
4580     case CODEC_ID_WMV1:
4581         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4582     case CODEC_ID_WMV2:
4583          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4584 #ifdef CONFIG_H261_ENCODER
4585     case CODEC_ID_H261:
4586         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4587 #endif
4588     case CODEC_ID_H263:
4589     case CODEC_ID_H263P:
4590     case CODEC_ID_FLV1:
4591     case CODEC_ID_RV10:
4592     case CODEC_ID_RV20:
4593         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4594     case CODEC_ID_MJPEG:
4595         mjpeg_encode_mb(s, s->block); break;
4596     default:
4597         assert(0);
4598     }
4599 }
4600
4601 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4602 {
4603     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
4604     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
4605 }
4606
4607 #endif //CONFIG_ENCODERS
4608
4609 void ff_mpeg_flush(AVCodecContext *avctx){
4610     int i;
4611     MpegEncContext *s = avctx->priv_data;
4612
4613     if(s==NULL || s->picture==NULL)
4614         return;
4615
4616     for(i=0; i<MAX_PICTURE_COUNT; i++){
4617        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4618                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4619         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4620     }
4621     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4622
4623     s->mb_x= s->mb_y= 0;
4624
4625     s->parse_context.state= -1;
4626     s->parse_context.frame_start_found= 0;
4627     s->parse_context.overread= 0;
4628     s->parse_context.overread_index= 0;
4629     s->parse_context.index= 0;
4630     s->parse_context.last_index= 0;
4631     s->bitstream_buffer_size=0;
4632     s->pp_time=0;
4633 }
4634
4635 #ifdef CONFIG_ENCODERS
4636 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4637 {
4638     const uint16_t *srcw= (uint16_t*)src;
4639     int words= length>>4;
4640     int bits= length&15;
4641     int i;
4642
4643     if(length==0) return;
4644
4645     if(words < 16){
4646         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4647     }else if(put_bits_count(pb)&7){
4648         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4649     }else{
4650         for(i=0; put_bits_count(pb)&31; i++)
4651             put_bits(pb, 8, src[i]);
4652         flush_put_bits(pb);
4653         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4654         skip_put_bytes(pb, 2*words-i);
4655     }
4656
4657     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4658 }
4659
4660 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4661     int i;
4662
4663     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4664
4665     /* mpeg1 */
4666     d->mb_skip_run= s->mb_skip_run;
4667     for(i=0; i<3; i++)
4668         d->last_dc[i]= s->last_dc[i];
4669
4670     /* statistics */
4671     d->mv_bits= s->mv_bits;
4672     d->i_tex_bits= s->i_tex_bits;
4673     d->p_tex_bits= s->p_tex_bits;
4674     d->i_count= s->i_count;
4675     d->f_count= s->f_count;
4676     d->b_count= s->b_count;
4677     d->skip_count= s->skip_count;
4678     d->misc_bits= s->misc_bits;
4679     d->last_bits= 0;
4680
4681     d->mb_skipped= 0;
4682     d->qscale= s->qscale;
4683     d->dquant= s->dquant;
4684 }
4685
4686 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4687     int i;
4688
4689     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4690     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4691
4692     /* mpeg1 */
4693     d->mb_skip_run= s->mb_skip_run;
4694     for(i=0; i<3; i++)
4695         d->last_dc[i]= s->last_dc[i];
4696
4697     /* statistics */
4698     d->mv_bits= s->mv_bits;
4699     d->i_tex_bits= s->i_tex_bits;
4700     d->p_tex_bits= s->p_tex_bits;
4701     d->i_count= s->i_count;
4702     d->f_count= s->f_count;
4703     d->b_count= s->b_count;
4704     d->skip_count= s->skip_count;
4705     d->misc_bits= s->misc_bits;
4706
4707     d->mb_intra= s->mb_intra;
4708     d->mb_skipped= s->mb_skipped;
4709     d->mv_type= s->mv_type;
4710     d->mv_dir= s->mv_dir;
4711     d->pb= s->pb;
4712     if(s->data_partitioning){
4713         d->pb2= s->pb2;
4714         d->tex_pb= s->tex_pb;
4715     }
4716     d->block= s->block;
4717     for(i=0; i<8; i++)
4718         d->block_last_index[i]= s->block_last_index[i];
4719     d->interlaced_dct= s->interlaced_dct;
4720     d->qscale= s->qscale;
4721 }
4722
4723 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4724                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4725                            int *dmin, int *next_block, int motion_x, int motion_y)
4726 {
4727     int score;
4728     uint8_t *dest_backup[3];
4729
4730     copy_context_before_encode(s, backup, type);
4731
4732     s->block= s->blocks[*next_block];
4733     s->pb= pb[*next_block];
4734     if(s->data_partitioning){
4735         s->pb2   = pb2   [*next_block];
4736         s->tex_pb= tex_pb[*next_block];
4737     }
4738
4739     if(*next_block){
4740         memcpy(dest_backup, s->dest, sizeof(s->dest));
4741         s->dest[0] = s->rd_scratchpad;
4742         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4743         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4744         assert(s->linesize >= 32); //FIXME
4745     }
4746
4747     encode_mb(s, motion_x, motion_y);
4748
4749     score= put_bits_count(&s->pb);
4750     if(s->data_partitioning){
4751         score+= put_bits_count(&s->pb2);
4752         score+= put_bits_count(&s->tex_pb);
4753     }
4754
4755     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4756         MPV_decode_mb(s, s->block);
4757
4758         score *= s->lambda2;
4759         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4760     }
4761
4762     if(*next_block){
4763         memcpy(s->dest, dest_backup, sizeof(s->dest));
4764     }
4765
4766     if(score<*dmin){
4767         *dmin= score;
4768         *next_block^=1;
4769
4770         copy_context_after_encode(best, s, type);
4771     }
4772 }
4773
4774 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4775     uint32_t *sq = ff_squareTbl + 256;
4776     int acc=0;
4777     int x,y;
4778
4779     if(w==16 && h==16)
4780         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4781     else if(w==8 && h==8)
4782         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4783
4784     for(y=0; y<h; y++){
4785         for(x=0; x<w; x++){
4786             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4787         }
4788     }
4789
4790     assert(acc>=0);
4791
4792     return acc;
4793 }
4794
4795 static int sse_mb(MpegEncContext *s){
4796     int w= 16;
4797     int h= 16;
4798
4799     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4800     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4801
4802     if(w==16 && h==16)
4803       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4804         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4805                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4806                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4807       }else{
4808         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4809                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4810                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4811       }
4812     else
4813         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4814                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4815                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4816 }
4817
4818 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4819     MpegEncContext *s= arg;
4820
4821
4822     s->me.pre_pass=1;
4823     s->me.dia_size= s->avctx->pre_dia_size;
4824     s->first_slice_line=1;
4825     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4826         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4827             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4828         }
4829         s->first_slice_line=0;
4830     }
4831
4832     s->me.pre_pass=0;
4833
4834     return 0;
4835 }
4836
4837 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4838     MpegEncContext *s= arg;
4839
4840     ff_check_alignment();
4841
4842     s->me.dia_size= s->avctx->dia_size;
4843     s->first_slice_line=1;
4844     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4845         s->mb_x=0; //for block init below
4846         ff_init_block_index(s);
4847         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4848             s->block_index[0]+=2;
4849             s->block_index[1]+=2;
4850             s->block_index[2]+=2;
4851             s->block_index[3]+=2;
4852
4853             /* compute motion vector & mb_type and store in context */
4854             if(s->pict_type==B_TYPE)
4855                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4856             else
4857                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4858         }
4859         s->first_slice_line=0;
4860     }
4861     return 0;
4862 }
4863
4864 static int mb_var_thread(AVCodecContext *c, void *arg){
4865     MpegEncContext *s= arg;
4866     int mb_x, mb_y;
4867
4868     ff_check_alignment();
4869
4870     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4871         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4872             int xx = mb_x * 16;
4873             int yy = mb_y * 16;
4874             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4875             int varc;
4876             int sum = s->dsp.pix_sum(pix, s->linesize);
4877
4878             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4879
4880             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4881             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4882             s->me.mb_var_sum_temp    += varc;
4883         }
4884     }
4885     return 0;
4886 }
4887
4888 static void write_slice_end(MpegEncContext *s){
4889     if(s->codec_id==CODEC_ID_MPEG4){
4890         if(s->partitioned_frame){
4891             ff_mpeg4_merge_partitions(s);
4892         }
4893
4894         ff_mpeg4_stuffing(&s->pb);
4895     }else if(s->out_format == FMT_MJPEG){
4896         ff_mjpeg_stuffing(&s->pb);
4897     }
4898
4899     align_put_bits(&s->pb);
4900     flush_put_bits(&s->pb);
4901
4902     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4903         s->misc_bits+= get_bits_diff(s);
4904 }
4905
4906 static int encode_thread(AVCodecContext *c, void *arg){
4907     MpegEncContext *s= arg;
4908     int mb_x, mb_y, pdif = 0;
4909     int i, j;
4910     MpegEncContext best_s, backup_s;
4911     uint8_t bit_buf[2][MAX_MB_BYTES];
4912     uint8_t bit_buf2[2][MAX_MB_BYTES];
4913     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4914     PutBitContext pb[2], pb2[2], tex_pb[2];
4915 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4916
4917     ff_check_alignment();
4918
4919     for(i=0; i<2; i++){
4920         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4921         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4922         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4923     }
4924
4925     s->last_bits= put_bits_count(&s->pb);
4926     s->mv_bits=0;
4927     s->misc_bits=0;
4928     s->i_tex_bits=0;
4929     s->p_tex_bits=0;
4930     s->i_count=0;
4931     s->f_count=0;
4932     s->b_count=0;
4933     s->skip_count=0;
4934
4935     for(i=0; i<3; i++){
4936         /* init last dc values */
4937         /* note: quant matrix value (8) is implied here */
4938         s->last_dc[i] = 128 << s->intra_dc_precision;
4939
4940         s->current_picture.error[i] = 0;
4941     }
4942     s->mb_skip_run = 0;
4943     memset(s->last_mv, 0, sizeof(s->last_mv));
4944
4945     s->last_mv_dir = 0;
4946
4947     switch(s->codec_id){
4948     case CODEC_ID_H263:
4949     case CODEC_ID_H263P:
4950     case CODEC_ID_FLV1:
4951         s->gob_index = ff_h263_get_gob_height(s);
4952         break;
4953     case CODEC_ID_MPEG4:
4954         if(s->partitioned_frame)
4955             ff_mpeg4_init_partitions(s);
4956         break;
4957     }
4958
4959     s->resync_mb_x=0;
4960     s->resync_mb_y=0;
4961     s->first_slice_line = 1;
4962     s->ptr_lastgob = s->pb.buf;
4963     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4964 //    printf("row %d at %X\n", s->mb_y, (int)s);
4965         s->mb_x=0;
4966         s->mb_y= mb_y;
4967
4968         ff_set_qscale(s, s->qscale);
4969         ff_init_block_index(s);
4970
4971         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4972             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4973             int mb_type= s->mb_type[xy];
4974 //            int d;
4975             int dmin= INT_MAX;
4976             int dir;
4977
4978             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4979                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4980                 return -1;
4981             }
4982             if(s->data_partitioning){
4983                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4984                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4985                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4986                     return -1;
4987                 }
4988             }
4989
4990             s->mb_x = mb_x;
4991             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4992             ff_update_block_index(s);
4993
4994 #ifdef CONFIG_H261_ENCODER
4995             if(s->codec_id == CODEC_ID_H261){
4996                 ff_h261_reorder_mb_index(s);
4997                 xy= s->mb_y*s->mb_stride + s->mb_x;
4998                 mb_type= s->mb_type[xy];
4999             }
5000 #endif
5001
5002             /* write gob / video packet header  */
5003             if(s->rtp_mode){
5004                 int current_packet_size, is_gob_start;
5005
5006                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
5007
5008                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
5009
5010                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
5011
5012                 switch(s->codec_id){
5013                 case CODEC_ID_H263:
5014                 case CODEC_ID_H263P:
5015                     if(!s->h263_slice_structured)
5016                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
5017                     break;
5018                 case CODEC_ID_MPEG2VIDEO:
5019                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
5020                 case CODEC_ID_MPEG1VIDEO:
5021                     if(s->mb_skip_run) is_gob_start=0;
5022                     break;
5023                 }
5024
5025                 if(is_gob_start){
5026                     if(s->start_mb_y != mb_y || mb_x!=0){
5027                         write_slice_end(s);
5028
5029                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
5030                             ff_mpeg4_init_partitions(s);
5031                         }
5032                     }
5033
5034                     assert((put_bits_count(&s->pb)&7) == 0);
5035                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
5036
5037                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
5038                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
5039                         int d= 100 / s->avctx->error_rate;
5040                         if(r % d == 0){
5041                             current_packet_size=0;
5042 #ifndef ALT_BITSTREAM_WRITER
5043                             s->pb.buf_ptr= s->ptr_lastgob;
5044 #endif
5045                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
5046                         }
5047                     }
5048
5049                     if (s->avctx->rtp_callback){
5050                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
5051                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
5052                     }
5053
5054                     switch(s->codec_id){
5055                     case CODEC_ID_MPEG4:
5056                         ff_mpeg4_encode_video_packet_header(s);
5057                         ff_mpeg4_clean_buffers(s);
5058                     break;
5059                     case CODEC_ID_MPEG1VIDEO:
5060                     case CODEC_ID_MPEG2VIDEO:
5061                         ff_mpeg1_encode_slice_header(s);
5062                         ff_mpeg1_clean_buffers(s);
5063                     break;
5064                     case CODEC_ID_H263:
5065                     case CODEC_ID_H263P:
5066                         h263_encode_gob_header(s, mb_y);
5067                     break;
5068                     }
5069
5070                     if(s->flags&CODEC_FLAG_PASS1){
5071                         int bits= put_bits_count(&s->pb);
5072                         s->misc_bits+= bits - s->last_bits;
5073                         s->last_bits= bits;
5074                     }
5075
5076                     s->ptr_lastgob += current_packet_size;
5077                     s->first_slice_line=1;
5078                     s->resync_mb_x=mb_x;
5079                     s->resync_mb_y=mb_y;
5080                 }
5081             }
5082
5083             if(  (s->resync_mb_x   == s->mb_x)
5084                && s->resync_mb_y+1 == s->mb_y){
5085                 s->first_slice_line=0;
5086             }
5087
5088             s->mb_skipped=0;
5089             s->dquant=0; //only for QP_RD
5090
5091             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
5092                 int next_block=0;
5093                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
5094
5095                 copy_context_before_encode(&backup_s, s, -1);
5096                 backup_s.pb= s->pb;
5097                 best_s.data_partitioning= s->data_partitioning;
5098                 best_s.partitioned_frame= s->partitioned_frame;
5099                 if(s->data_partitioning){
5100                     backup_s.pb2= s->pb2;
5101                     backup_s.tex_pb= s->tex_pb;
5102                 }
5103
5104                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
5105                     s->mv_dir = MV_DIR_FORWARD;
5106                     s->mv_type = MV_TYPE_16X16;
5107                     s->mb_intra= 0;
5108                     s->mv[0][0][0] = s->p_mv_table[xy][0];
5109                     s->mv[0][0][1] = s->p_mv_table[xy][1];
5110                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
5111                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5112                 }
5113                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
5114                     s->mv_dir = MV_DIR_FORWARD;
5115                     s->mv_type = MV_TYPE_FIELD;
5116                     s->mb_intra= 0;
5117                     for(i=0; i<2; i++){
5118                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5119                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5120                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5121                     }
5122                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
5123                                  &dmin, &next_block, 0, 0);
5124                 }
5125                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5126                     s->mv_dir = MV_DIR_FORWARD;
5127                     s->mv_type = MV_TYPE_16X16;
5128                     s->mb_intra= 0;
5129                     s->mv[0][0][0] = 0;
5130                     s->mv[0][0][1] = 0;
5131                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5132                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5133                 }
5134                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5135                     s->mv_dir = MV_DIR_FORWARD;
5136                     s->mv_type = MV_TYPE_8X8;
5137                     s->mb_intra= 0;
5138                     for(i=0; i<4; i++){
5139                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5140                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5141                     }
5142                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5143                                  &dmin, &next_block, 0, 0);
5144                 }
5145                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5146                     s->mv_dir = MV_DIR_FORWARD;
5147                     s->mv_type = MV_TYPE_16X16;
5148                     s->mb_intra= 0;
5149                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5150                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5151                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5152                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5153                 }
5154                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5155                     s->mv_dir = MV_DIR_BACKWARD;
5156                     s->mv_type = MV_TYPE_16X16;
5157                     s->mb_intra= 0;
5158                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5159                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5160                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5161                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5162                 }
5163                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5164                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5165                     s->mv_type = MV_TYPE_16X16;
5166                     s->mb_intra= 0;
5167                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5168                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5169                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5170                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5171                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5172                                  &dmin, &next_block, 0, 0);
5173                 }
5174                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5175                     s->mv_dir = MV_DIR_FORWARD;
5176                     s->mv_type = MV_TYPE_FIELD;
5177                     s->mb_intra= 0;
5178                     for(i=0; i<2; i++){
5179                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5180                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5181                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5182                     }
5183                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5184                                  &dmin, &next_block, 0, 0);
5185                 }
5186                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5187                     s->mv_dir = MV_DIR_BACKWARD;
5188                     s->mv_type = MV_TYPE_FIELD;
5189                     s->mb_intra= 0;
5190                     for(i=0; i<2; i++){
5191                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5192                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5193                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5194                     }
5195                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5196                                  &dmin, &next_block, 0, 0);
5197                 }
5198                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5199                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5200                     s->mv_type = MV_TYPE_FIELD;
5201                     s->mb_intra= 0;
5202                     for(dir=0; dir<2; dir++){
5203                         for(i=0; i<2; i++){
5204                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5205                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5206                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5207                         }
5208                     }
5209                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5210                                  &dmin, &next_block, 0, 0);
5211                 }
5212                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5213                     s->mv_dir = 0;
5214                     s->mv_type = MV_TYPE_16X16;
5215                     s->mb_intra= 1;
5216                     s->mv[0][0][0] = 0;
5217                     s->mv[0][0][1] = 0;
5218                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5219                                  &dmin, &next_block, 0, 0);
5220                     if(s->h263_pred || s->h263_aic){
5221                         if(best_s.mb_intra)
5222                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5223                         else
5224                             ff_clean_intra_table_entries(s); //old mode?
5225                     }
5226                 }
5227
5228                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
5229                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
5230                         const int last_qp= backup_s.qscale;
5231                         int qpi, qp, dc[6];
5232                         DCTELEM ac[6][16];
5233                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5234                         static const int dquant_tab[4]={-1,1,-2,2};
5235
5236                         assert(backup_s.dquant == 0);
5237
5238                         //FIXME intra
5239                         s->mv_dir= best_s.mv_dir;
5240                         s->mv_type = MV_TYPE_16X16;
5241                         s->mb_intra= best_s.mb_intra;
5242                         s->mv[0][0][0] = best_s.mv[0][0][0];
5243                         s->mv[0][0][1] = best_s.mv[0][0][1];
5244                         s->mv[1][0][0] = best_s.mv[1][0][0];
5245                         s->mv[1][0][1] = best_s.mv[1][0][1];
5246
5247                         qpi = s->pict_type == B_TYPE ? 2 : 0;
5248                         for(; qpi<4; qpi++){
5249                             int dquant= dquant_tab[qpi];
5250                             qp= last_qp + dquant;
5251                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5252                                 continue;
5253                             backup_s.dquant= dquant;
5254                             if(s->mb_intra && s->dc_val[0]){
5255                                 for(i=0; i<6; i++){
5256                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5257                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5258                                 }
5259                             }
5260
5261                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5262                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5263                             if(best_s.qscale != qp){
5264                                 if(s->mb_intra && s->dc_val[0]){
5265                                     for(i=0; i<6; i++){
5266                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5267                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5268                                     }
5269                                 }
5270                             }
5271                         }
5272                     }
5273                 }
5274                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5275                     int mx= s->b_direct_mv_table[xy][0];
5276                     int my= s->b_direct_mv_table[xy][1];
5277
5278                     backup_s.dquant = 0;
5279                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5280                     s->mb_intra= 0;
5281                     ff_mpeg4_set_direct_mv(s, mx, my);
5282                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5283                                  &dmin, &next_block, mx, my);
5284                 }
5285                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT0){
5286                     backup_s.dquant = 0;
5287                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5288                     s->mb_intra= 0;
5289                     ff_mpeg4_set_direct_mv(s, 0, 0);
5290                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5291                                  &dmin, &next_block, 0, 0);
5292                 }
5293                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
5294                     int coded=0;
5295                     for(i=0; i<6; i++)
5296                         coded |= s->block_last_index[i];
5297                     if(coded){
5298                         int mx,my;
5299                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
5300                         if(best_s.mv_dir & MV_DIRECT){
5301                             mx=my=0; //FIXME find the one we actually used
5302                             ff_mpeg4_set_direct_mv(s, mx, my);
5303                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
5304                             mx= s->mv[1][0][0];
5305                             my= s->mv[1][0][1];
5306                         }else{
5307                             mx= s->mv[0][0][0];
5308                             my= s->mv[0][0][1];
5309                         }
5310
5311                         s->mv_dir= best_s.mv_dir;
5312                         s->mv_type = best_s.mv_type;
5313                         s->mb_intra= 0;
5314 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
5315                         s->mv[0][0][1] = best_s.mv[0][0][1];
5316                         s->mv[1][0][0] = best_s.mv[1][0][0];
5317                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
5318                         backup_s.dquant= 0;
5319                         s->skipdct=1;
5320                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5321                                         &dmin, &next_block, mx, my);
5322                         s->skipdct=0;
5323                     }
5324                 }
5325
5326                 s->current_picture.qscale_table[xy]= best_s.qscale;
5327
5328                 copy_context_after_encode(s, &best_s, -1);
5329
5330                 pb_bits_count= put_bits_count(&s->pb);
5331                 flush_put_bits(&s->pb);
5332                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5333                 s->pb= backup_s.pb;
5334
5335                 if(s->data_partitioning){
5336                     pb2_bits_count= put_bits_count(&s->pb2);
5337                     flush_put_bits(&s->pb2);
5338                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5339                     s->pb2= backup_s.pb2;
5340
5341                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5342                     flush_put_bits(&s->tex_pb);
5343                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5344                     s->tex_pb= backup_s.tex_pb;
5345                 }
5346                 s->last_bits= put_bits_count(&s->pb);
5347
5348                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5349                     ff_h263_update_motion_val(s);
5350
5351                 if(next_block==0){ //FIXME 16 vs linesize16
5352                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5353                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5354                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5355                 }
5356
5357                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5358                     MPV_decode_mb(s, s->block);
5359             } else {
5360                 int motion_x = 0, motion_y = 0;
5361                 s->mv_type=MV_TYPE_16X16;
5362                 // only one MB-Type possible
5363
5364                 switch(mb_type){
5365                 case CANDIDATE_MB_TYPE_INTRA:
5366                     s->mv_dir = 0;
5367                     s->mb_intra= 1;
5368                     motion_x= s->mv[0][0][0] = 0;
5369                     motion_y= s->mv[0][0][1] = 0;
5370                     break;
5371                 case CANDIDATE_MB_TYPE_INTER:
5372                     s->mv_dir = MV_DIR_FORWARD;
5373                     s->mb_intra= 0;
5374                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5375                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5376                     break;
5377                 case CANDIDATE_MB_TYPE_INTER_I:
5378                     s->mv_dir = MV_DIR_FORWARD;
5379                     s->mv_type = MV_TYPE_FIELD;
5380                     s->mb_intra= 0;
5381                     for(i=0; i<2; i++){
5382                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5383                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5384                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5385                     }
5386                     break;
5387                 case CANDIDATE_MB_TYPE_INTER4V:
5388                     s->mv_dir = MV_DIR_FORWARD;
5389                     s->mv_type = MV_TYPE_8X8;
5390                     s->mb_intra= 0;
5391                     for(i=0; i<4; i++){
5392                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5393                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5394                     }
5395                     break;
5396                 case CANDIDATE_MB_TYPE_DIRECT:
5397                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5398                     s->mb_intra= 0;
5399                     motion_x=s->b_direct_mv_table[xy][0];
5400                     motion_y=s->b_direct_mv_table[xy][1];
5401                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5402                     break;
5403                 case CANDIDATE_MB_TYPE_DIRECT0:
5404                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5405                     s->mb_intra= 0;
5406                     ff_mpeg4_set_direct_mv(s, 0, 0);
5407                     break;
5408                 case CANDIDATE_MB_TYPE_BIDIR:
5409                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5410                     s->mb_intra= 0;
5411                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5412                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5413                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5414                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5415                     break;
5416                 case CANDIDATE_MB_TYPE_BACKWARD:
5417                     s->mv_dir = MV_DIR_BACKWARD;
5418                     s->mb_intra= 0;
5419                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5420                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5421                     break;
5422                 case CANDIDATE_MB_TYPE_FORWARD:
5423                     s->mv_dir = MV_DIR_FORWARD;
5424                     s->mb_intra= 0;
5425                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5426                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5427 //                    printf(" %d %d ", motion_x, motion_y);
5428                     break;
5429                 case CANDIDATE_MB_TYPE_FORWARD_I:
5430                     s->mv_dir = MV_DIR_FORWARD;
5431                     s->mv_type = MV_TYPE_FIELD;
5432                     s->mb_intra= 0;
5433                     for(i=0; i<2; i++){
5434                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5435                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5436                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5437                     }
5438                     break;
5439                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5440                     s->mv_dir = MV_DIR_BACKWARD;
5441                     s->mv_type = MV_TYPE_FIELD;
5442                     s->mb_intra= 0;
5443                     for(i=0; i<2; i++){
5444                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5445                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5446                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5447                     }
5448                     break;
5449                 case CANDIDATE_MB_TYPE_BIDIR_I:
5450                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5451                     s->mv_type = MV_TYPE_FIELD;
5452                     s->mb_intra= 0;
5453                     for(dir=0; dir<2; dir++){
5454                         for(i=0; i<2; i++){
5455                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5456                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5457                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5458                         }
5459                     }
5460                     break;
5461                 default:
5462                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5463                 }
5464
5465                 encode_mb(s, motion_x, motion_y);
5466
5467                 // RAL: Update last macroblock type
5468                 s->last_mv_dir = s->mv_dir;
5469
5470                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5471                     ff_h263_update_motion_val(s);
5472
5473                 MPV_decode_mb(s, s->block);
5474             }
5475
5476             /* clean the MV table in IPS frames for direct mode in B frames */
5477             if(s->mb_intra /* && I,P,S_TYPE */){
5478                 s->p_mv_table[xy][0]=0;
5479                 s->p_mv_table[xy][1]=0;
5480             }
5481
5482             if(s->flags&CODEC_FLAG_PSNR){
5483                 int w= 16;
5484                 int h= 16;
5485
5486                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5487                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5488
5489                 s->current_picture.error[0] += sse(
5490                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5491                     s->dest[0], w, h, s->linesize);
5492                 s->current_picture.error[1] += sse(
5493                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5494                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5495                 s->current_picture.error[2] += sse(
5496                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5497                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5498             }
5499             if(s->loop_filter){
5500                 if(s->out_format == FMT_H263)
5501                     ff_h263_loop_filter(s);
5502             }
5503 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5504         }
5505     }
5506
5507     //not beautiful here but we must write it before flushing so it has to be here
5508     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5509         msmpeg4_encode_ext_header(s);
5510
5511     write_slice_end(s);
5512
5513     /* Send the last GOB if RTP */
5514     if (s->avctx->rtp_callback) {
5515         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5516         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5517         /* Call the RTP callback to send the last GOB */
5518         emms_c();
5519         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5520     }
5521
5522     return 0;
5523 }
5524
5525 #define MERGE(field) dst->field += src->field; src->field=0
5526 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5527     MERGE(me.scene_change_score);
5528     MERGE(me.mc_mb_var_sum_temp);
5529     MERGE(me.mb_var_sum_temp);
5530 }
5531
5532 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5533     int i;
5534
5535     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5536     MERGE(dct_count[1]);
5537     MERGE(mv_bits);
5538     MERGE(i_tex_bits);
5539     MERGE(p_tex_bits);
5540     MERGE(i_count);
5541     MERGE(f_count);
5542     MERGE(b_count);
5543     MERGE(skip_count);
5544     MERGE(misc_bits);
5545     MERGE(error_count);
5546     MERGE(padding_bug_score);
5547     MERGE(current_picture.error[0]);
5548     MERGE(current_picture.error[1]);
5549     MERGE(current_picture.error[2]);
5550
5551     if(dst->avctx->noise_reduction){
5552         for(i=0; i<64; i++){
5553             MERGE(dct_error_sum[0][i]);
5554             MERGE(dct_error_sum[1][i]);
5555         }
5556     }
5557
5558     assert(put_bits_count(&src->pb) % 8 ==0);
5559     assert(put_bits_count(&dst->pb) % 8 ==0);
5560     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5561     flush_put_bits(&dst->pb);
5562 }
5563
5564 static int estimate_qp(MpegEncContext *s, int dry_run){
5565     if (s->next_lambda){
5566         s->current_picture_ptr->quality=
5567         s->current_picture.quality = s->next_lambda;
5568         if(!dry_run) s->next_lambda= 0;
5569     } else if (!s->fixed_qscale) {
5570         s->current_picture_ptr->quality=
5571         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5572         if (s->current_picture.quality < 0)
5573             return -1;
5574     }
5575
5576     if(s->adaptive_quant){
5577         switch(s->codec_id){
5578         case CODEC_ID_MPEG4:
5579             ff_clean_mpeg4_qscales(s);
5580             break;
5581         case CODEC_ID_H263:
5582         case CODEC_ID_H263P:
5583         case CODEC_ID_FLV1:
5584             ff_clean_h263_qscales(s);
5585             break;
5586         }
5587
5588         s->lambda= s->lambda_table[0];
5589         //FIXME broken
5590     }else
5591         s->lambda= s->current_picture.quality;
5592 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5593     update_qscale(s);
5594     return 0;
5595 }
5596
5597 static int encode_picture(MpegEncContext *s, int picture_number)
5598 {
5599     int i;
5600     int bits;
5601
5602     s->picture_number = picture_number;
5603
5604     /* Reset the average MB variance */
5605     s->me.mb_var_sum_temp    =
5606     s->me.mc_mb_var_sum_temp = 0;
5607
5608     /* we need to initialize some time vars before we can encode b-frames */
5609     // RAL: Condition added for MPEG1VIDEO
5610     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5611         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5612
5613     s->me.scene_change_score=0;
5614
5615 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5616
5617     if(s->pict_type==I_TYPE){
5618         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5619         else                        s->no_rounding=0;
5620     }else if(s->pict_type!=B_TYPE){
5621         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5622             s->no_rounding ^= 1;
5623     }
5624
5625     if(s->flags & CODEC_FLAG_PASS2){
5626         if (estimate_qp(s,1) < 0)
5627             return -1;
5628         ff_get_2pass_fcode(s);
5629     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5630         if(s->pict_type==B_TYPE)
5631             s->lambda= s->last_lambda_for[s->pict_type];
5632         else
5633             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5634         update_qscale(s);
5635     }
5636
5637     s->mb_intra=0; //for the rate distortion & bit compare functions
5638     for(i=1; i<s->avctx->thread_count; i++){
5639         ff_update_duplicate_context(s->thread_context[i], s);
5640     }
5641
5642     ff_init_me(s);
5643
5644     /* Estimate motion for every MB */
5645     if(s->pict_type != I_TYPE){
5646         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5647         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
5648         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5649             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5650                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5651             }
5652         }
5653
5654         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5655     }else /* if(s->pict_type == I_TYPE) */{
5656         /* I-Frame */
5657         for(i=0; i<s->mb_stride*s->mb_height; i++)
5658             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5659
5660         if(!s->fixed_qscale){
5661             /* finding spatial complexity for I-frame rate control */
5662             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5663         }
5664     }
5665     for(i=1; i<s->avctx->thread_count; i++){
5666         merge_context_after_me(s, s->thread_context[i]);
5667     }
5668     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5669     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5670     emms_c();
5671
5672     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5673         s->pict_type= I_TYPE;
5674         for(i=0; i<s->mb_stride*s->mb_height; i++)
5675             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5676 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5677     }
5678
5679     if(!s->umvplus){
5680         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5681             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5682
5683             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5684                 int a,b;
5685                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5686                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5687                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5688             }
5689
5690             ff_fix_long_p_mvs(s);
5691             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5692             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5693                 int j;
5694                 for(i=0; i<2; i++){
5695                     for(j=0; j<2; j++)
5696                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5697                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5698                 }
5699             }
5700         }
5701
5702         if(s->pict_type==B_TYPE){
5703             int a, b;
5704
5705             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5706             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5707             s->f_code = FFMAX(a, b);
5708
5709             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5710             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5711             s->b_code = FFMAX(a, b);
5712
5713             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5714             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5715             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5716             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5717             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5718                 int dir, j;
5719                 for(dir=0; dir<2; dir++){
5720                     for(i=0; i<2; i++){
5721                         for(j=0; j<2; j++){
5722                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5723                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5724                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5725                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5726                         }
5727                     }
5728                 }
5729             }
5730         }
5731     }
5732
5733     if (estimate_qp(s, 0) < 0)
5734         return -1;
5735
5736     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5737         s->qscale= 3; //reduce clipping problems
5738
5739     if (s->out_format == FMT_MJPEG) {
5740         /* for mjpeg, we do include qscale in the matrix */
5741         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5742         for(i=1;i<64;i++){
5743             int j= s->dsp.idct_permutation[i];
5744
5745             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5746         }
5747         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5748                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5749         s->qscale= 8;
5750     }
5751
5752     //FIXME var duplication
5753     s->current_picture_ptr->key_frame=
5754     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5755     s->current_picture_ptr->pict_type=
5756     s->current_picture.pict_type= s->pict_type;
5757
5758     if(s->current_picture.key_frame)
5759         s->picture_in_gop_number=0;
5760
5761     s->last_bits= put_bits_count(&s->pb);
5762     switch(s->out_format) {
5763     case FMT_MJPEG:
5764         mjpeg_picture_header(s);
5765         break;
5766 #ifdef CONFIG_H261_ENCODER
5767     case FMT_H261:
5768         ff_h261_encode_picture_header(s, picture_number);
5769         break;
5770 #endif
5771     case FMT_H263:
5772         if (s->codec_id == CODEC_ID_WMV2)
5773             ff_wmv2_encode_picture_header(s, picture_number);
5774         else if (s->h263_msmpeg4)
5775             msmpeg4_encode_picture_header(s, picture_number);
5776         else if (s->h263_pred)
5777             mpeg4_encode_picture_header(s, picture_number);
5778 #ifdef CONFIG_RV10_ENCODER
5779         else if (s->codec_id == CODEC_ID_RV10)
5780             rv10_encode_picture_header(s, picture_number);
5781 #endif
5782 #ifdef CONFIG_RV20_ENCODER
5783         else if (s->codec_id == CODEC_ID_RV20)
5784             rv20_encode_picture_header(s, picture_number);
5785 #endif
5786         else if (s->codec_id == CODEC_ID_FLV1)
5787             ff_flv_encode_picture_header(s, picture_number);
5788         else
5789             h263_encode_picture_header(s, picture_number);
5790         break;
5791     case FMT_MPEG1:
5792         mpeg1_encode_picture_header(s, picture_number);
5793         break;
5794     case FMT_H264:
5795         break;
5796     default:
5797         assert(0);
5798     }
5799     bits= put_bits_count(&s->pb);
5800     s->header_bits= bits - s->last_bits;
5801
5802     for(i=1; i<s->avctx->thread_count; i++){
5803         update_duplicate_context_after_me(s->thread_context[i], s);
5804     }
5805     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5806     for(i=1; i<s->avctx->thread_count; i++){
5807         merge_context_after_encode(s, s->thread_context[i]);
5808     }
5809     emms_c();
5810     return 0;
5811 }
5812
5813 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5814     const int intra= s->mb_intra;
5815     int i;
5816
5817     s->dct_count[intra]++;
5818
5819     for(i=0; i<64; i++){
5820         int level= block[i];
5821
5822         if(level){
5823             if(level>0){
5824                 s->dct_error_sum[intra][i] += level;
5825                 level -= s->dct_offset[intra][i];
5826                 if(level<0) level=0;
5827             }else{
5828                 s->dct_error_sum[intra][i] -= level;
5829                 level += s->dct_offset[intra][i];
5830                 if(level>0) level=0;
5831             }
5832             block[i]= level;
5833         }
5834     }
5835 }
5836
5837 static int dct_quantize_trellis_c(MpegEncContext *s,
5838                         DCTELEM *block, int n,
5839                         int qscale, int *overflow){
5840     const int *qmat;
5841     const uint8_t *scantable= s->intra_scantable.scantable;
5842     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5843     int max=0;
5844     unsigned int threshold1, threshold2;
5845     int bias=0;
5846     int run_tab[65];
5847     int level_tab[65];
5848     int score_tab[65];
5849     int survivor[65];
5850     int survivor_count;
5851     int last_run=0;
5852     int last_level=0;
5853     int last_score= 0;
5854     int last_i;
5855     int coeff[2][64];
5856     int coeff_count[64];
5857     int qmul, qadd, start_i, last_non_zero, i, dc;
5858     const int esc_length= s->ac_esc_length;
5859     uint8_t * length;
5860     uint8_t * last_length;
5861     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5862
5863     s->dsp.fdct (block);
5864
5865     if(s->dct_error_sum)
5866         s->denoise_dct(s, block);
5867     qmul= qscale*16;
5868     qadd= ((qscale-1)|1)*8;
5869
5870     if (s->mb_intra) {
5871         int q;
5872         if (!s->h263_aic) {
5873             if (n < 4)
5874                 q = s->y_dc_scale;
5875             else
5876                 q = s->c_dc_scale;
5877             q = q << 3;
5878         } else{
5879             /* For AIC we skip quant/dequant of INTRADC */
5880             q = 1 << 3;
5881             qadd=0;
5882         }
5883
5884         /* note: block[0] is assumed to be positive */
5885         block[0] = (block[0] + (q >> 1)) / q;
5886         start_i = 1;
5887         last_non_zero = 0;
5888         qmat = s->q_intra_matrix[qscale];
5889         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5890             bias= 1<<(QMAT_SHIFT-1);
5891         length     = s->intra_ac_vlc_length;
5892         last_length= s->intra_ac_vlc_last_length;
5893     } else {
5894         start_i = 0;
5895         last_non_zero = -1;
5896         qmat = s->q_inter_matrix[qscale];
5897         length     = s->inter_ac_vlc_length;
5898         last_length= s->inter_ac_vlc_last_length;
5899     }
5900     last_i= start_i;
5901
5902     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5903     threshold2= (threshold1<<1);
5904
5905     for(i=63; i>=start_i; i--) {
5906         const int j = scantable[i];
5907         int level = block[j] * qmat[j];
5908
5909         if(((unsigned)(level+threshold1))>threshold2){
5910             last_non_zero = i;
5911             break;
5912         }
5913     }
5914
5915     for(i=start_i; i<=last_non_zero; i++) {
5916         const int j = scantable[i];
5917         int level = block[j] * qmat[j];
5918
5919 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5920 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5921         if(((unsigned)(level+threshold1))>threshold2){
5922             if(level>0){
5923                 level= (bias + level)>>QMAT_SHIFT;
5924                 coeff[0][i]= level;
5925                 coeff[1][i]= level-1;
5926 //                coeff[2][k]= level-2;
5927             }else{
5928                 level= (bias - level)>>QMAT_SHIFT;
5929                 coeff[0][i]= -level;
5930                 coeff[1][i]= -level+1;
5931 //                coeff[2][k]= -level+2;
5932             }
5933             coeff_count[i]= FFMIN(level, 2);
5934             assert(coeff_count[i]);
5935             max |=level;
5936         }else{
5937             coeff[0][i]= (level>>31)|1;
5938             coeff_count[i]= 1;
5939         }
5940     }
5941
5942     *overflow= s->max_qcoeff < max; //overflow might have happened
5943
5944     if(last_non_zero < start_i){
5945         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5946         return last_non_zero;
5947     }
5948
5949     score_tab[start_i]= 0;
5950     survivor[0]= start_i;
5951     survivor_count= 1;
5952
5953     for(i=start_i; i<=last_non_zero; i++){
5954         int level_index, j;
5955         const int dct_coeff= FFABS(block[ scantable[i] ]);
5956         const int zero_distoration= dct_coeff*dct_coeff;
5957         int best_score=256*256*256*120;
5958         for(level_index=0; level_index < coeff_count[i]; level_index++){
5959             int distoration;
5960             int level= coeff[level_index][i];
5961             const int alevel= FFABS(level);
5962             int unquant_coeff;
5963
5964             assert(level);
5965
5966             if(s->out_format == FMT_H263){
5967                 unquant_coeff= alevel*qmul + qadd;
5968             }else{ //MPEG1
5969                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5970                 if(s->mb_intra){
5971                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5972                         unquant_coeff =   (unquant_coeff - 1) | 1;
5973                 }else{
5974                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5975                         unquant_coeff =   (unquant_coeff - 1) | 1;
5976                 }
5977                 unquant_coeff<<= 3;
5978             }
5979
5980             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5981             level+=64;
5982             if((level&(~127)) == 0){
5983                 for(j=survivor_count-1; j>=0; j--){
5984                     int run= i - survivor[j];
5985                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5986                     score += score_tab[i-run];
5987
5988                     if(score < best_score){
5989                         best_score= score;
5990                         run_tab[i+1]= run;
5991                         level_tab[i+1]= level-64;
5992                     }
5993                 }
5994
5995                 if(s->out_format == FMT_H263){
5996                     for(j=survivor_count-1; j>=0; j--){
5997                         int run= i - survivor[j];
5998                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5999                         score += score_tab[i-run];
6000                         if(score < last_score){
6001                             last_score= score;
6002                             last_run= run;
6003                             last_level= level-64;
6004                             last_i= i+1;
6005                         }
6006                     }
6007                 }
6008             }else{
6009                 distoration += esc_length*lambda;
6010                 for(j=survivor_count-1; j>=0; j--){
6011                     int run= i - survivor[j];
6012                     int score= distoration + score_tab[i-run];
6013
6014                     if(score < best_score){
6015                         best_score= score;
6016                         run_tab[i+1]= run;
6017                         level_tab[i+1]= level-64;
6018                     }
6019                 }
6020
6021                 if(s->out_format == FMT_H263){
6022                   for(j=survivor_count-1; j>=0; j--){
6023                         int run= i - survivor[j];
6024                         int score= distoration + score_tab[i-run];
6025                         if(score < last_score){
6026                             last_score= score;
6027                             last_run= run;
6028                             last_level= level-64;
6029                             last_i= i+1;
6030                         }
6031                     }
6032                 }
6033             }
6034         }
6035
6036         score_tab[i+1]= best_score;
6037
6038         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
6039         if(last_non_zero <= 27){
6040             for(; survivor_count; survivor_count--){
6041                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
6042                     break;
6043             }
6044         }else{
6045             for(; survivor_count; survivor_count--){
6046                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
6047                     break;
6048             }
6049         }
6050
6051         survivor[ survivor_count++ ]= i+1;
6052     }
6053
6054     if(s->out_format != FMT_H263){
6055         last_score= 256*256*256*120;
6056         for(i= survivor[0]; i<=last_non_zero + 1; i++){
6057             int score= score_tab[i];
6058             if(i) score += lambda*2; //FIXME exacter?
6059
6060             if(score < last_score){
6061                 last_score= score;
6062                 last_i= i;
6063                 last_level= level_tab[i];
6064                 last_run= run_tab[i];
6065             }
6066         }
6067     }
6068
6069     s->coded_score[n] = last_score;
6070
6071     dc= FFABS(block[0]);
6072     last_non_zero= last_i - 1;
6073     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
6074
6075     if(last_non_zero < start_i)
6076         return last_non_zero;
6077
6078     if(last_non_zero == 0 && start_i == 0){
6079         int best_level= 0;
6080         int best_score= dc * dc;
6081
6082         for(i=0; i<coeff_count[0]; i++){
6083             int level= coeff[i][0];
6084             int alevel= FFABS(level);
6085             int unquant_coeff, score, distortion;
6086
6087             if(s->out_format == FMT_H263){
6088                     unquant_coeff= (alevel*qmul + qadd)>>3;
6089             }else{ //MPEG1
6090                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
6091                     unquant_coeff =   (unquant_coeff - 1) | 1;
6092             }
6093             unquant_coeff = (unquant_coeff + 4) >> 3;
6094             unquant_coeff<<= 3 + 3;
6095
6096             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
6097             level+=64;
6098             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
6099             else                    score= distortion + esc_length*lambda;
6100
6101             if(score < best_score){
6102                 best_score= score;
6103                 best_level= level - 64;
6104             }
6105         }
6106         block[0]= best_level;
6107         s->coded_score[n] = best_score - dc*dc;
6108         if(best_level == 0) return -1;
6109         else                return last_non_zero;
6110     }
6111
6112     i= last_i;
6113     assert(last_level);
6114
6115     block[ perm_scantable[last_non_zero] ]= last_level;
6116     i -= last_run + 1;
6117
6118     for(; i>start_i; i -= run_tab[i] + 1){
6119         block[ perm_scantable[i-1] ]= level_tab[i];
6120     }
6121
6122     return last_non_zero;
6123 }
6124
6125 //#define REFINE_STATS 1
6126 static int16_t basis[64][64];
6127
6128 static void build_basis(uint8_t *perm){
6129     int i, j, x, y;
6130     emms_c();
6131     for(i=0; i<8; i++){
6132         for(j=0; j<8; j++){
6133             for(y=0; y<8; y++){
6134                 for(x=0; x<8; x++){
6135                     double s= 0.25*(1<<BASIS_SHIFT);
6136                     int index= 8*i + j;
6137                     int perm_index= perm[index];
6138                     if(i==0) s*= sqrt(0.5);
6139                     if(j==0) s*= sqrt(0.5);
6140                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
6141                 }
6142             }
6143         }
6144     }
6145 }
6146
6147 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
6148                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
6149                         int n, int qscale){
6150     int16_t rem[64];
6151     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
6152     const int *qmat;
6153     const uint8_t *scantable= s->intra_scantable.scantable;
6154     const uint8_t *perm_scantable= s->intra_scantable.permutated;
6155 //    unsigned int threshold1, threshold2;
6156 //    int bias=0;
6157     int run_tab[65];
6158     int prev_run=0;
6159     int prev_level=0;
6160     int qmul, qadd, start_i, last_non_zero, i, dc;
6161     uint8_t * length;
6162     uint8_t * last_length;
6163     int lambda;
6164     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
6165 #ifdef REFINE_STATS
6166 static int count=0;
6167 static int after_last=0;
6168 static int to_zero=0;
6169 static int from_zero=0;
6170 static int raise=0;
6171 static int lower=0;
6172 static int messed_sign=0;
6173 #endif
6174
6175     if(basis[0][0] == 0)
6176         build_basis(s->dsp.idct_permutation);
6177
6178     qmul= qscale*2;
6179     qadd= (qscale-1)|1;
6180     if (s->mb_intra) {
6181         if (!s->h263_aic) {
6182             if (n < 4)
6183                 q = s->y_dc_scale;
6184             else
6185                 q = s->c_dc_scale;
6186         } else{
6187             /* For AIC we skip quant/dequant of INTRADC */
6188             q = 1;
6189             qadd=0;
6190         }
6191         q <<= RECON_SHIFT-3;
6192         /* note: block[0] is assumed to be positive */
6193         dc= block[0]*q;
6194 //        block[0] = (block[0] + (q >> 1)) / q;
6195         start_i = 1;
6196         qmat = s->q_intra_matrix[qscale];
6197 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6198 //            bias= 1<<(QMAT_SHIFT-1);
6199         length     = s->intra_ac_vlc_length;
6200         last_length= s->intra_ac_vlc_last_length;
6201     } else {
6202         dc= 0;
6203         start_i = 0;
6204         qmat = s->q_inter_matrix[qscale];
6205         length     = s->inter_ac_vlc_length;
6206         last_length= s->inter_ac_vlc_last_length;
6207     }
6208     last_non_zero = s->block_last_index[n];
6209
6210 #ifdef REFINE_STATS
6211 {START_TIMER
6212 #endif
6213     dc += (1<<(RECON_SHIFT-1));
6214     for(i=0; i<64; i++){
6215         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6216     }
6217 #ifdef REFINE_STATS
6218 STOP_TIMER("memset rem[]")}
6219 #endif
6220     sum=0;
6221     for(i=0; i<64; i++){
6222         int one= 36;
6223         int qns=4;
6224         int w;
6225
6226         w= FFABS(weight[i]) + qns*one;
6227         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6228
6229         weight[i] = w;
6230 //        w=weight[i] = (63*qns + (w/2)) / w;
6231
6232         assert(w>0);
6233         assert(w<(1<<6));
6234         sum += w*w;
6235     }
6236     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6237 #ifdef REFINE_STATS
6238 {START_TIMER
6239 #endif
6240     run=0;
6241     rle_index=0;
6242     for(i=start_i; i<=last_non_zero; i++){
6243         int j= perm_scantable[i];
6244         const int level= block[j];
6245         int coeff;
6246
6247         if(level){
6248             if(level<0) coeff= qmul*level - qadd;
6249             else        coeff= qmul*level + qadd;
6250             run_tab[rle_index++]=run;
6251             run=0;
6252
6253             s->dsp.add_8x8basis(rem, basis[j], coeff);
6254         }else{
6255             run++;
6256         }
6257     }
6258 #ifdef REFINE_STATS
6259 if(last_non_zero>0){
6260 STOP_TIMER("init rem[]")
6261 }
6262 }
6263
6264 {START_TIMER
6265 #endif
6266     for(;;){
6267         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6268         int best_coeff=0;
6269         int best_change=0;
6270         int run2, best_unquant_change=0, analyze_gradient;
6271 #ifdef REFINE_STATS
6272 {START_TIMER
6273 #endif
6274         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6275
6276         if(analyze_gradient){
6277 #ifdef REFINE_STATS
6278 {START_TIMER
6279 #endif
6280             for(i=0; i<64; i++){
6281                 int w= weight[i];
6282
6283                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6284             }
6285 #ifdef REFINE_STATS
6286 STOP_TIMER("rem*w*w")}
6287 {START_TIMER
6288 #endif
6289             s->dsp.fdct(d1);
6290 #ifdef REFINE_STATS
6291 STOP_TIMER("dct")}
6292 #endif
6293         }
6294
6295         if(start_i){
6296             const int level= block[0];
6297             int change, old_coeff;
6298
6299             assert(s->mb_intra);
6300
6301             old_coeff= q*level;
6302
6303             for(change=-1; change<=1; change+=2){
6304                 int new_level= level + change;
6305                 int score, new_coeff;
6306
6307                 new_coeff= q*new_level;
6308                 if(new_coeff >= 2048 || new_coeff < 0)
6309                     continue;
6310
6311                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6312                 if(score<best_score){
6313                     best_score= score;
6314                     best_coeff= 0;
6315                     best_change= change;
6316                     best_unquant_change= new_coeff - old_coeff;
6317                 }
6318             }
6319         }
6320
6321         run=0;
6322         rle_index=0;
6323         run2= run_tab[rle_index++];
6324         prev_level=0;
6325         prev_run=0;
6326
6327         for(i=start_i; i<64; i++){
6328             int j= perm_scantable[i];
6329             const int level= block[j];
6330             int change, old_coeff;
6331
6332             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6333                 break;
6334
6335             if(level){
6336                 if(level<0) old_coeff= qmul*level - qadd;
6337                 else        old_coeff= qmul*level + qadd;
6338                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6339             }else{
6340                 old_coeff=0;
6341                 run2--;
6342                 assert(run2>=0 || i >= last_non_zero );
6343             }
6344
6345             for(change=-1; change<=1; change+=2){
6346                 int new_level= level + change;
6347                 int score, new_coeff, unquant_change;
6348
6349                 score=0;
6350                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
6351                    continue;
6352
6353                 if(new_level){
6354                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6355                     else            new_coeff= qmul*new_level + qadd;
6356                     if(new_coeff >= 2048 || new_coeff <= -2048)
6357                         continue;
6358                     //FIXME check for overflow
6359
6360                     if(level){
6361                         if(level < 63 && level > -63){
6362                             if(i < last_non_zero)
6363                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6364                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6365                             else
6366                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6367                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6368                         }
6369                     }else{
6370                         assert(FFABS(new_level)==1);
6371
6372                         if(analyze_gradient){
6373                             int g= d1[ scantable[i] ];
6374                             if(g && (g^new_level) >= 0)
6375                                 continue;
6376                         }
6377
6378                         if(i < last_non_zero){
6379                             int next_i= i + run2 + 1;
6380                             int next_level= block[ perm_scantable[next_i] ] + 64;
6381
6382                             if(next_level&(~127))
6383                                 next_level= 0;
6384
6385                             if(next_i < last_non_zero)
6386                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6387                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6388                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6389                             else
6390                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6391                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6392                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6393                         }else{
6394                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6395                             if(prev_level){
6396                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6397                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6398                             }
6399                         }
6400                     }
6401                 }else{
6402                     new_coeff=0;
6403                     assert(FFABS(level)==1);
6404
6405                     if(i < last_non_zero){
6406                         int next_i= i + run2 + 1;
6407                         int next_level= block[ perm_scantable[next_i] ] + 64;
6408
6409                         if(next_level&(~127))
6410                             next_level= 0;
6411
6412                         if(next_i < last_non_zero)
6413                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6414                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6415                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6416                         else
6417                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6418                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6419                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6420                     }else{
6421                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6422                         if(prev_level){
6423                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6424                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6425                         }
6426                     }
6427                 }
6428
6429                 score *= lambda;
6430
6431                 unquant_change= new_coeff - old_coeff;
6432                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6433
6434                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6435                 if(score<best_score){
6436                     best_score= score;
6437                     best_coeff= i;
6438                     best_change= change;
6439                     best_unquant_change= unquant_change;
6440                 }
6441             }
6442             if(level){
6443                 prev_level= level + 64;
6444                 if(prev_level&(~127))
6445                     prev_level= 0;
6446                 prev_run= run;
6447                 run=0;
6448             }else{
6449                 run++;
6450             }
6451         }
6452 #ifdef REFINE_STATS
6453 STOP_TIMER("iterative step")}
6454 #endif
6455
6456         if(best_change){
6457             int j= perm_scantable[ best_coeff ];
6458
6459             block[j] += best_change;
6460
6461             if(best_coeff > last_non_zero){
6462                 last_non_zero= best_coeff;
6463                 assert(block[j]);
6464 #ifdef REFINE_STATS
6465 after_last++;
6466 #endif
6467             }else{
6468 #ifdef REFINE_STATS
6469 if(block[j]){
6470     if(block[j] - best_change){
6471         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
6472             raise++;
6473         }else{
6474             lower++;
6475         }
6476     }else{
6477         from_zero++;
6478     }
6479 }else{
6480     to_zero++;
6481 }
6482 #endif
6483                 for(; last_non_zero>=start_i; last_non_zero--){
6484                     if(block[perm_scantable[last_non_zero]])
6485                         break;
6486                 }
6487             }
6488 #ifdef REFINE_STATS
6489 count++;
6490 if(256*256*256*64 % count == 0){
6491     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6492 }
6493 #endif
6494             run=0;
6495             rle_index=0;
6496             for(i=start_i; i<=last_non_zero; i++){
6497                 int j= perm_scantable[i];
6498                 const int level= block[j];
6499
6500                  if(level){
6501                      run_tab[rle_index++]=run;
6502                      run=0;
6503                  }else{
6504                      run++;
6505                  }
6506             }
6507
6508             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6509         }else{
6510             break;
6511         }
6512     }
6513 #ifdef REFINE_STATS
6514 if(last_non_zero>0){
6515 STOP_TIMER("iterative search")
6516 }
6517 }
6518 #endif
6519
6520     return last_non_zero;
6521 }
6522
6523 static int dct_quantize_c(MpegEncContext *s,
6524                         DCTELEM *block, int n,
6525                         int qscale, int *overflow)
6526 {
6527     int i, j, level, last_non_zero, q, start_i;
6528     const int *qmat;
6529     const uint8_t *scantable= s->intra_scantable.scantable;
6530     int bias;
6531     int max=0;
6532     unsigned int threshold1, threshold2;
6533
6534     s->dsp.fdct (block);
6535
6536     if(s->dct_error_sum)
6537         s->denoise_dct(s, block);
6538
6539     if (s->mb_intra) {
6540         if (!s->h263_aic) {
6541             if (n < 4)
6542                 q = s->y_dc_scale;
6543             else
6544                 q = s->c_dc_scale;
6545             q = q << 3;
6546         } else
6547             /* For AIC we skip quant/dequant of INTRADC */
6548             q = 1 << 3;
6549
6550         /* note: block[0] is assumed to be positive */
6551         block[0] = (block[0] + (q >> 1)) / q;
6552         start_i = 1;
6553         last_non_zero = 0;
6554         qmat = s->q_intra_matrix[qscale];
6555         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6556     } else {
6557         start_i = 0;
6558         last_non_zero = -1;
6559         qmat = s->q_inter_matrix[qscale];
6560         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6561     }
6562     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6563     threshold2= (threshold1<<1);
6564     for(i=63;i>=start_i;i--) {
6565         j = scantable[i];
6566         level = block[j] * qmat[j];
6567
6568         if(((unsigned)(level+threshold1))>threshold2){
6569             last_non_zero = i;
6570             break;
6571         }else{
6572             block[j]=0;
6573         }
6574     }
6575     for(i=start_i; i<=last_non_zero; i++) {
6576         j = scantable[i];
6577         level = block[j] * qmat[j];
6578
6579 //        if(   bias+level >= (1<<QMAT_SHIFT)
6580 //           || bias-level >= (1<<QMAT_SHIFT)){
6581         if(((unsigned)(level+threshold1))>threshold2){
6582             if(level>0){
6583                 level= (bias + level)>>QMAT_SHIFT;
6584                 block[j]= level;
6585             }else{
6586                 level= (bias - level)>>QMAT_SHIFT;
6587                 block[j]= -level;
6588             }
6589             max |=level;
6590         }else{
6591             block[j]=0;
6592         }
6593     }
6594     *overflow= s->max_qcoeff < max; //overflow might have happened
6595
6596     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6597     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6598         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6599
6600     return last_non_zero;
6601 }
6602
6603 #endif //CONFIG_ENCODERS
6604
6605 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6606                                    DCTELEM *block, int n, int qscale)
6607 {
6608     int i, level, nCoeffs;
6609     const uint16_t *quant_matrix;
6610
6611     nCoeffs= s->block_last_index[n];
6612
6613     if (n < 4)
6614         block[0] = block[0] * s->y_dc_scale;
6615     else
6616         block[0] = block[0] * s->c_dc_scale;
6617     /* XXX: only mpeg1 */
6618     quant_matrix = s->intra_matrix;
6619     for(i=1;i<=nCoeffs;i++) {
6620         int j= s->intra_scantable.permutated[i];
6621         level = block[j];
6622         if (level) {
6623             if (level < 0) {
6624                 level = -level;
6625                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6626                 level = (level - 1) | 1;
6627                 level = -level;
6628             } else {
6629                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6630                 level = (level - 1) | 1;
6631             }
6632             block[j] = level;
6633         }
6634     }
6635 }
6636
6637 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6638                                    DCTELEM *block, int n, int qscale)
6639 {
6640     int i, level, nCoeffs;
6641     const uint16_t *quant_matrix;
6642
6643     nCoeffs= s->block_last_index[n];
6644
6645     quant_matrix = s->inter_matrix;
6646     for(i=0; i<=nCoeffs; i++) {
6647         int j= s->intra_scantable.permutated[i];
6648         level = block[j];
6649         if (level) {
6650             if (level < 0) {
6651                 level = -level;
6652                 level = (((level << 1) + 1) * qscale *
6653                          ((int) (quant_matrix[j]))) >> 4;
6654                 level = (level - 1) | 1;
6655                 level = -level;
6656             } else {
6657                 level = (((level << 1) + 1) * qscale *
6658                          ((int) (quant_matrix[j]))) >> 4;
6659                 level = (level - 1) | 1;
6660             }
6661             block[j] = level;
6662         }
6663     }
6664 }
6665
6666 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6667                                    DCTELEM *block, int n, int qscale)
6668 {
6669     int i, level, nCoeffs;
6670     const uint16_t *quant_matrix;
6671
6672     if(s->alternate_scan) nCoeffs= 63;
6673     else nCoeffs= s->block_last_index[n];
6674
6675     if (n < 4)
6676         block[0] = block[0] * s->y_dc_scale;
6677     else
6678         block[0] = block[0] * s->c_dc_scale;
6679     quant_matrix = s->intra_matrix;
6680     for(i=1;i<=nCoeffs;i++) {
6681         int j= s->intra_scantable.permutated[i];
6682         level = block[j];
6683         if (level) {
6684             if (level < 0) {
6685                 level = -level;
6686                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6687                 level = -level;
6688             } else {
6689                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6690             }
6691             block[j] = level;
6692         }
6693     }
6694 }
6695
6696 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
6697                                    DCTELEM *block, int n, int qscale)
6698 {
6699     int i, level, nCoeffs;
6700     const uint16_t *quant_matrix;
6701     int sum=-1;
6702
6703     if(s->alternate_scan) nCoeffs= 63;
6704     else nCoeffs= s->block_last_index[n];
6705
6706     if (n < 4)
6707         block[0] = block[0] * s->y_dc_scale;
6708     else
6709         block[0] = block[0] * s->c_dc_scale;
6710     quant_matrix = s->intra_matrix;
6711     for(i=1;i<=nCoeffs;i++) {
6712         int j= s->intra_scantable.permutated[i];
6713         level = block[j];
6714         if (level) {
6715             if (level < 0) {
6716                 level = -level;
6717                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6718                 level = -level;
6719             } else {
6720                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6721             }
6722             block[j] = level;
6723             sum+=level;
6724         }
6725     }
6726     block[63]^=sum&1;
6727 }
6728
6729 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6730                                    DCTELEM *block, int n, int qscale)
6731 {
6732     int i, level, nCoeffs;
6733     const uint16_t *quant_matrix;
6734     int sum=-1;
6735
6736     if(s->alternate_scan) nCoeffs= 63;
6737     else nCoeffs= s->block_last_index[n];
6738
6739     quant_matrix = s->inter_matrix;
6740     for(i=0; i<=nCoeffs; i++) {
6741         int j= s->intra_scantable.permutated[i];
6742         level = block[j];
6743         if (level) {
6744             if (level < 0) {
6745                 level = -level;
6746                 level = (((level << 1) + 1) * qscale *
6747                          ((int) (quant_matrix[j]))) >> 4;
6748                 level = -level;
6749             } else {
6750                 level = (((level << 1) + 1) * qscale *
6751                          ((int) (quant_matrix[j]))) >> 4;
6752             }
6753             block[j] = level;
6754             sum+=level;
6755         }
6756     }
6757     block[63]^=sum&1;
6758 }
6759
6760 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6761                                   DCTELEM *block, int n, int qscale)
6762 {
6763     int i, level, qmul, qadd;
6764     int nCoeffs;
6765
6766     assert(s->block_last_index[n]>=0);
6767
6768     qmul = qscale << 1;
6769
6770     if (!s->h263_aic) {
6771         if (n < 4)
6772             block[0] = block[0] * s->y_dc_scale;
6773         else
6774             block[0] = block[0] * s->c_dc_scale;
6775         qadd = (qscale - 1) | 1;
6776     }else{
6777         qadd = 0;
6778     }
6779     if(s->ac_pred)
6780         nCoeffs=63;
6781     else
6782         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6783
6784     for(i=1; i<=nCoeffs; i++) {
6785         level = block[i];
6786         if (level) {
6787             if (level < 0) {
6788                 level = level * qmul - qadd;
6789             } else {
6790                 level = level * qmul + qadd;
6791             }
6792             block[i] = level;
6793         }
6794     }
6795 }
6796
6797 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6798                                   DCTELEM *block, int n, int qscale)
6799 {
6800     int i, level, qmul, qadd;
6801     int nCoeffs;
6802
6803     assert(s->block_last_index[n]>=0);
6804
6805     qadd = (qscale - 1) | 1;
6806     qmul = qscale << 1;
6807
6808     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6809
6810     for(i=0; i<=nCoeffs; i++) {
6811         level = block[i];
6812         if (level) {
6813             if (level < 0) {
6814                 level = level * qmul - qadd;
6815             } else {
6816                 level = level * qmul + qadd;
6817             }
6818             block[i] = level;
6819         }
6820     }
6821 }
6822
6823 #ifdef CONFIG_ENCODERS
6824 AVCodec h263_encoder = {
6825     "h263",
6826     CODEC_TYPE_VIDEO,
6827     CODEC_ID_H263,
6828     sizeof(MpegEncContext),
6829     MPV_encode_init,
6830     MPV_encode_picture,
6831     MPV_encode_end,
6832     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6833 };
6834
6835 AVCodec h263p_encoder = {
6836     "h263p",
6837     CODEC_TYPE_VIDEO,
6838     CODEC_ID_H263P,
6839     sizeof(MpegEncContext),
6840     MPV_encode_init,
6841     MPV_encode_picture,
6842     MPV_encode_end,
6843     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6844 };
6845
6846 AVCodec flv_encoder = {
6847     "flv",
6848     CODEC_TYPE_VIDEO,
6849     CODEC_ID_FLV1,
6850     sizeof(MpegEncContext),
6851     MPV_encode_init,
6852     MPV_encode_picture,
6853     MPV_encode_end,
6854     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6855 };
6856
6857 AVCodec rv10_encoder = {
6858     "rv10",
6859     CODEC_TYPE_VIDEO,
6860     CODEC_ID_RV10,
6861     sizeof(MpegEncContext),
6862     MPV_encode_init,
6863     MPV_encode_picture,
6864     MPV_encode_end,
6865     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6866 };
6867
6868 AVCodec rv20_encoder = {
6869     "rv20",
6870     CODEC_TYPE_VIDEO,
6871     CODEC_ID_RV20,
6872     sizeof(MpegEncContext),
6873     MPV_encode_init,
6874     MPV_encode_picture,
6875     MPV_encode_end,
6876     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6877 };
6878
6879 AVCodec mpeg4_encoder = {
6880     "mpeg4",
6881     CODEC_TYPE_VIDEO,
6882     CODEC_ID_MPEG4,
6883     sizeof(MpegEncContext),
6884     MPV_encode_init,
6885     MPV_encode_picture,
6886     MPV_encode_end,
6887     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6888     .capabilities= CODEC_CAP_DELAY,
6889 };
6890
6891 AVCodec msmpeg4v1_encoder = {
6892     "msmpeg4v1",
6893     CODEC_TYPE_VIDEO,
6894     CODEC_ID_MSMPEG4V1,
6895     sizeof(MpegEncContext),
6896     MPV_encode_init,
6897     MPV_encode_picture,
6898     MPV_encode_end,
6899     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6900 };
6901
6902 AVCodec msmpeg4v2_encoder = {
6903     "msmpeg4v2",
6904     CODEC_TYPE_VIDEO,
6905     CODEC_ID_MSMPEG4V2,
6906     sizeof(MpegEncContext),
6907     MPV_encode_init,
6908     MPV_encode_picture,
6909     MPV_encode_end,
6910     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6911 };
6912
6913 AVCodec msmpeg4v3_encoder = {
6914     "msmpeg4",
6915     CODEC_TYPE_VIDEO,
6916     CODEC_ID_MSMPEG4V3,
6917     sizeof(MpegEncContext),
6918     MPV_encode_init,
6919     MPV_encode_picture,
6920     MPV_encode_end,
6921     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6922 };
6923
6924 AVCodec wmv1_encoder = {
6925     "wmv1",
6926     CODEC_TYPE_VIDEO,
6927     CODEC_ID_WMV1,
6928     sizeof(MpegEncContext),
6929     MPV_encode_init,
6930     MPV_encode_picture,
6931     MPV_encode_end,
6932     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6933 };
6934
6935 AVCodec mjpeg_encoder = {
6936     "mjpeg",
6937     CODEC_TYPE_VIDEO,
6938     CODEC_ID_MJPEG,
6939     sizeof(MpegEncContext),
6940     MPV_encode_init,
6941     MPV_encode_picture,
6942     MPV_encode_end,
6943     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, PIX_FMT_YUVJ422P, -1},
6944 };
6945
6946 #endif //CONFIG_ENCODERS