]> git.sesse.net Git - ffmpeg/blob - libavcodec/huffyuv.c
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
[ffmpeg] / libavcodec / huffyuv.c
1 /*
2  * huffyuv codec for libavcodec
3  *
4  * Copyright (c) 2002-2003 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  * see http://www.pcisys.net/~melanson/codecs/huffyuv.txt for a description of
21  * the algorithm used 
22  */
23  
24 /**
25  * @file huffyuv.c
26  * huffyuv codec for libavcodec.
27  */
28
29 #include "common.h"
30 #include "avcodec.h"
31 #include "dsputil.h"
32
33 #ifndef INT64_MAX
34 #define INT64_MAX 9223372036854775807LL
35 #endif
36
37 #define VLC_BITS 11
38
39 typedef enum Predictor{
40     LEFT= 0,
41     PLANE,
42     MEDIAN,
43 } Predictor;
44  
45 typedef struct HYuvContext{
46     AVCodecContext *avctx;
47     Predictor predictor;
48     GetBitContext gb;
49     PutBitContext pb;
50     int interlaced;
51     int decorrelate;
52     int bitstream_bpp;
53     int version;
54     int yuy2;                               //use yuy2 instead of 422P
55     int bgr32;                              //use bgr32 instead of bgr24
56     int width, height;
57     int flags;
58     int picture_number;
59     int last_slice_end;
60     uint8_t __align8 temp[3][2560];
61     uint64_t stats[3][256];
62     uint8_t len[3][256];
63     uint32_t bits[3][256];
64     VLC vlc[3];
65     AVFrame picture;
66     uint8_t __align8 bitstream_buffer[1024*1024*3]; //FIXME dynamic alloc or some other solution
67     DSPContext dsp; 
68 }HYuvContext;
69
70 static const unsigned char classic_shift_luma[] = {
71   34,36,35,69,135,232,9,16,10,24,11,23,12,16,13,10,14,8,15,8,
72   16,8,17,20,16,10,207,206,205,236,11,8,10,21,9,23,8,8,199,70,
73   69,68, 0
74 };
75
76 static const unsigned char classic_shift_chroma[] = {
77   66,36,37,38,39,40,41,75,76,77,110,239,144,81,82,83,84,85,118,183,
78   56,57,88,89,56,89,154,57,58,57,26,141,57,56,58,57,58,57,184,119,
79   214,245,116,83,82,49,80,79,78,77,44,75,41,40,39,38,37,36,34, 0
80 };
81
82 static const unsigned char classic_add_luma[256] = {
83     3,  9,  5, 12, 10, 35, 32, 29, 27, 50, 48, 45, 44, 41, 39, 37,
84    73, 70, 68, 65, 64, 61, 58, 56, 53, 50, 49, 46, 44, 41, 38, 36,
85    68, 65, 63, 61, 58, 55, 53, 51, 48, 46, 45, 43, 41, 39, 38, 36,
86    35, 33, 32, 30, 29, 27, 26, 25, 48, 47, 46, 44, 43, 41, 40, 39,
87    37, 36, 35, 34, 32, 31, 30, 28, 27, 26, 24, 23, 22, 20, 19, 37,
88    35, 34, 33, 31, 30, 29, 27, 26, 24, 23, 21, 20, 18, 17, 15, 29,
89    27, 26, 24, 22, 21, 19, 17, 16, 14, 26, 25, 23, 21, 19, 18, 16,
90    15, 27, 25, 23, 21, 19, 17, 16, 14, 26, 25, 23, 21, 18, 17, 14,
91    12, 17, 19, 13,  4,  9,  2, 11,  1,  7,  8,  0, 16,  3, 14,  6,
92    12, 10,  5, 15, 18, 11, 10, 13, 15, 16, 19, 20, 22, 24, 27, 15,
93    18, 20, 22, 24, 26, 14, 17, 20, 22, 24, 27, 15, 18, 20, 23, 25,
94    28, 16, 19, 22, 25, 28, 32, 36, 21, 25, 29, 33, 38, 42, 45, 49,
95    28, 31, 34, 37, 40, 42, 44, 47, 49, 50, 52, 54, 56, 57, 59, 60,
96    62, 64, 66, 67, 69, 35, 37, 39, 40, 42, 43, 45, 47, 48, 51, 52,
97    54, 55, 57, 59, 60, 62, 63, 66, 67, 69, 71, 72, 38, 40, 42, 43,
98    46, 47, 49, 51, 26, 28, 30, 31, 33, 34, 18, 19, 11, 13,  7,  8,
99 };
100
101 static const unsigned char classic_add_chroma[256] = {
102     3,  1,  2,  2,  2,  2,  3,  3,  7,  5,  7,  5,  8,  6, 11,  9,
103     7, 13, 11, 10,  9,  8,  7,  5,  9,  7,  6,  4,  7,  5,  8,  7,
104    11,  8, 13, 11, 19, 15, 22, 23, 20, 33, 32, 28, 27, 29, 51, 77,
105    43, 45, 76, 81, 46, 82, 75, 55, 56,144, 58, 80, 60, 74,147, 63,
106   143, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
107    80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 27, 30, 21, 22,
108    17, 14,  5,  6,100, 54, 47, 50, 51, 53,106,107,108,109,110,111,
109   112,113,114,115,  4,117,118, 92, 94,121,122,  3,124,103,  2,  1,
110     0,129,130,131,120,119,126,125,136,137,138,139,140,141,142,134,
111   135,132,133,104, 64,101, 62, 57,102, 95, 93, 59, 61, 28, 97, 96,
112    52, 49, 48, 29, 32, 25, 24, 46, 23, 98, 45, 44, 43, 20, 42, 41,
113    19, 18, 99, 40, 15, 39, 38, 16, 13, 12, 11, 37, 10,  9,  8, 36,
114     7,128,127,105,123,116, 35, 34, 33,145, 31, 79, 42,146, 78, 26,
115    83, 48, 49, 50, 44, 47, 26, 31, 30, 18, 17, 19, 21, 24, 25, 13,
116    14, 16, 17, 18, 20, 21, 12, 14, 15,  9, 10,  6,  9,  6,  5,  8,
117     6, 12,  8, 10,  7,  9,  6,  4,  6,  2,  2,  3,  3,  3,  3,  2,
118 };
119
120 static inline int add_left_prediction(uint8_t *dst, uint8_t *src, int w, int acc){
121     int i;
122
123     for(i=0; i<w-1; i++){
124         acc+= src[i];
125         dst[i]= acc;
126         i++;
127         acc+= src[i];
128         dst[i]= acc;
129     }
130
131     for(; i<w; i++){
132         acc+= src[i];
133         dst[i]= acc;
134     }
135
136     return acc;
137 }
138
139 static inline void add_median_prediction(uint8_t *dst, uint8_t *src1, uint8_t *diff, int w, int *left, int *left_top){
140     int i;
141     uint8_t l, lt;
142
143     l= *left;
144     lt= *left_top;
145
146     for(i=0; i<w; i++){
147         l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
148         lt= src1[i];
149         dst[i]= l;
150     }    
151
152     *left= l;
153     *left_top= lt;
154 }
155
156 static inline void add_left_prediction_bgr32(uint8_t *dst, uint8_t *src, int w, int *red, int *green, int *blue){
157     int i;
158     int r,g,b;
159     r= *red;
160     g= *green;
161     b= *blue;
162
163     for(i=0; i<w; i++){
164         b+= src[4*i+0];
165         g+= src[4*i+1];
166         r+= src[4*i+2];
167         
168         dst[4*i+0]= b;
169         dst[4*i+1]= g;
170         dst[4*i+2]= r;
171     }
172
173     *red= r;
174     *green= g;
175     *blue= b;
176 }
177
178 static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst, uint8_t *src, int w, int left){
179     int i;
180     if(w<32){
181         for(i=0; i<w; i++){
182             const int temp= src[i];
183             dst[i]= temp - left;
184             left= temp;
185         }
186         return left;
187     }else{
188         for(i=0; i<16; i++){
189             const int temp= src[i];
190             dst[i]= temp - left;
191             left= temp;
192         }
193         s->dsp.diff_bytes(dst+16, src+16, src+15, w-16);
194         return src[w-1];
195     }
196 }
197
198 static void read_len_table(uint8_t *dst, GetBitContext *gb){
199     int i, val, repeat;
200   
201     for(i=0; i<256;){
202         repeat= get_bits(gb, 3);
203         val   = get_bits(gb, 5);
204         if(repeat==0)
205             repeat= get_bits(gb, 8);
206 //printf("%d %d\n", val, repeat);
207         while (repeat--)
208             dst[i++] = val;
209     }
210 }
211
212 static int generate_bits_table(uint32_t *dst, uint8_t *len_table){
213     int len, index;
214     uint32_t bits=0;
215
216     for(len=32; len>0; len--){
217         for(index=0; index<256; index++){
218             if(len_table[index]==len)
219                 dst[index]= bits++;
220         }
221         if(bits & 1){
222             fprintf(stderr, "Error generating huffman table\n");
223             return -1;
224         }
225         bits >>= 1;
226     }
227     return 0;
228 }
229
230 static void generate_len_table(uint8_t *dst, uint64_t *stats, int size){
231     uint64_t counts[2*size];
232     int up[2*size];
233     int offset, i, next;
234     
235     for(offset=1; ; offset<<=1){
236         for(i=0; i<size; i++){
237             counts[i]= stats[i] + offset - 1;
238         }
239         
240         for(next=size; next<size*2; next++){
241             uint64_t min1, min2;
242             int min1_i, min2_i;
243             
244             min1=min2= INT64_MAX;
245             min1_i= min2_i=-1;
246             
247             for(i=0; i<next; i++){
248                 if(min2 > counts[i]){
249                     if(min1 > counts[i]){
250                         min2= min1;
251                         min2_i= min1_i;
252                         min1= counts[i];
253                         min1_i= i;
254                     }else{
255                         min2= counts[i];
256                         min2_i= i;
257                     }
258                 }
259             }
260             
261             if(min2==INT64_MAX) break;
262             
263             counts[next]= min1 + min2;
264             counts[min1_i]=
265             counts[min2_i]= INT64_MAX;
266             up[min1_i]=
267             up[min2_i]= next;
268             up[next]= -1;
269         }
270         
271         for(i=0; i<size; i++){
272             int len;
273             int index=i;
274             
275             for(len=0; up[index] != -1; len++)
276                 index= up[index];
277                 
278             if(len > 32) break;
279             
280             dst[i]= len;
281         }
282         if(i==size) break;
283     }
284 }
285
286 static int read_huffman_tables(HYuvContext *s, uint8_t *src, int length){
287     GetBitContext gb;
288     int i;
289     
290     init_get_bits(&gb, src, length*8);
291     
292     for(i=0; i<3; i++){
293         read_len_table(s->len[i], &gb);
294         
295         if(generate_bits_table(s->bits[i], s->len[i])<0){
296             return -1;
297         }
298 #if 0
299 for(j=0; j<256; j++){
300 printf("%6X, %2d,  %3d\n", s->bits[i][j], s->len[i][j], j);
301 }
302 #endif
303         init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4);
304     }
305     
306     return 0;
307 }
308
309 static int read_old_huffman_tables(HYuvContext *s){
310 #if 1
311     GetBitContext gb;
312     int i;
313
314     init_get_bits(&gb, classic_shift_luma, sizeof(classic_shift_luma)*8);
315     read_len_table(s->len[0], &gb);
316     init_get_bits(&gb, classic_shift_chroma, sizeof(classic_shift_chroma)*8);
317     read_len_table(s->len[1], &gb);
318     
319     for(i=0; i<256; i++) s->bits[0][i] = classic_add_luma  [i];
320     for(i=0; i<256; i++) s->bits[1][i] = classic_add_chroma[i];
321
322     if(s->bitstream_bpp >= 24){
323         memcpy(s->bits[1], s->bits[0], 256*sizeof(uint32_t));
324         memcpy(s->len[1] , s->len [0], 256*sizeof(uint8_t));
325     }
326     memcpy(s->bits[2], s->bits[1], 256*sizeof(uint32_t));
327     memcpy(s->len[2] , s->len [1], 256*sizeof(uint8_t));
328     
329     for(i=0; i<3; i++)
330         init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4);
331     
332     return 0;
333 #else
334     fprintf(stderr, "v1 huffyuv is not supported \n");
335     return -1;
336 #endif
337 }
338
339 static int decode_init(AVCodecContext *avctx)
340 {
341     HYuvContext *s = avctx->priv_data;
342     int width, height;
343
344     s->avctx= avctx;
345     s->flags= avctx->flags;
346         
347     dsputil_init(&s->dsp, avctx);
348     
349     width= s->width= avctx->width;
350     height= s->height= avctx->height;
351     avctx->coded_frame= &s->picture;
352
353 s->bgr32=1;
354     assert(width && height);
355 //if(avctx->extradata)
356 //  printf("extradata:%X, extradata_size:%d\n", *(uint32_t*)avctx->extradata, avctx->extradata_size);
357     if(avctx->extradata_size){
358         if((avctx->bits_per_sample&7) && avctx->bits_per_sample != 12)
359             s->version=1; // do such files exist at all?
360         else
361             s->version=2;
362     }else
363         s->version=0;
364     
365     if(s->version==2){
366         int method;
367
368         method= ((uint8_t*)avctx->extradata)[0];
369         s->decorrelate= method&64 ? 1 : 0;
370         s->predictor= method&63;
371         s->bitstream_bpp= ((uint8_t*)avctx->extradata)[1];
372         if(s->bitstream_bpp==0) 
373             s->bitstream_bpp= avctx->bits_per_sample&~7;
374             
375         if(read_huffman_tables(s, ((uint8_t*)avctx->extradata)+4, avctx->extradata_size) < 0)
376             return -1;
377     }else{
378         switch(avctx->bits_per_sample&7){
379         case 1:
380             s->predictor= LEFT;
381             s->decorrelate= 0;
382             break;
383         case 2:
384             s->predictor= LEFT;
385             s->decorrelate= 1;
386             break;
387         case 3:
388             s->predictor= PLANE;
389             s->decorrelate= avctx->bits_per_sample >= 24;
390             break;
391         case 4:
392             s->predictor= MEDIAN;
393             s->decorrelate= 0;
394             break;
395         default:
396             s->predictor= LEFT; //OLD
397             s->decorrelate= 0;
398             break;
399         }
400         s->bitstream_bpp= avctx->bits_per_sample & ~7;
401         
402         if(read_old_huffman_tables(s) < 0)
403             return -1;
404     }
405     
406     s->interlaced= height > 288;
407     
408     switch(s->bitstream_bpp){
409     case 12:
410         avctx->pix_fmt = PIX_FMT_YUV420P;
411         break;
412     case 16:
413         if(s->yuy2){
414             avctx->pix_fmt = PIX_FMT_YUV422;
415         }else{
416             avctx->pix_fmt = PIX_FMT_YUV422P;
417         }
418         break;
419     case 24:
420     case 32:
421         if(s->bgr32){
422             avctx->pix_fmt = PIX_FMT_RGBA32;
423         }else{
424             avctx->pix_fmt = PIX_FMT_BGR24;
425         }
426         break;
427     default:
428         assert(0);
429     }
430     
431 //    printf("pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced);
432     
433     return 0;
434 }
435
436 static void store_table(HYuvContext *s, uint8_t *len){
437     int i;
438     int index= s->avctx->extradata_size;
439
440     for(i=0; i<256;){
441         int val= len[i];
442         int repeat=0;
443         
444         for(; i<256 && len[i]==val && repeat<255; i++)
445             repeat++;
446         
447         assert(val < 32 && val >0 && repeat<256 && repeat>0);
448         if(repeat>7){
449             ((uint8_t*)s->avctx->extradata)[index++]= val;
450             ((uint8_t*)s->avctx->extradata)[index++]= repeat;
451         }else{
452             ((uint8_t*)s->avctx->extradata)[index++]= val | (repeat<<5);
453         }
454     }
455     
456     s->avctx->extradata_size= index;
457 }
458
459 static int encode_init(AVCodecContext *avctx)
460 {
461     HYuvContext *s = avctx->priv_data;
462     int i, j, width, height;
463
464     s->avctx= avctx;
465     s->flags= avctx->flags;
466         
467     dsputil_init(&s->dsp, avctx);
468     
469     width= s->width= avctx->width;
470     height= s->height= avctx->height;
471     
472     assert(width && height);
473     
474     avctx->extradata= av_mallocz(1024*30);
475     avctx->stats_out= av_mallocz(1024*30);
476     s->version=2;
477     
478     avctx->coded_frame= &s->picture;
479     
480     switch(avctx->pix_fmt){
481     case PIX_FMT_YUV420P:
482         if(avctx->strict_std_compliance>=0){
483             fprintf(stderr, "YV12-huffyuv is experimental, there WILL be no compatbility! (use (v)strict=-1)\n");
484             return -1;
485         }
486         s->bitstream_bpp= 12;
487         break;
488     case PIX_FMT_YUV422P:
489         s->bitstream_bpp= 16;
490         break;
491     default:
492         fprintf(stderr, "format not supported\n");
493         return -1;
494     }
495     avctx->bits_per_sample= s->bitstream_bpp;
496     s->decorrelate= s->bitstream_bpp >= 24;
497     s->predictor= avctx->prediction_method;
498     
499     ((uint8_t*)avctx->extradata)[0]= s->predictor;
500     ((uint8_t*)avctx->extradata)[1]= s->bitstream_bpp;
501     ((uint8_t*)avctx->extradata)[2]=
502     ((uint8_t*)avctx->extradata)[3]= 0;
503     s->avctx->extradata_size= 4;
504     
505     if(avctx->stats_in){
506         char *p= avctx->stats_in;
507     
508         for(i=0; i<3; i++)
509             for(j=0; j<256; j++)
510                 s->stats[i][j]= 1;
511
512         for(;;){
513             for(i=0; i<3; i++){
514                 char *next;
515
516                 for(j=0; j<256; j++){
517                     s->stats[i][j]+= strtol(p, &next, 0);
518                     if(next==p) return -1;
519                     p=next;
520                 }        
521             }
522             if(p[0]==0 || p[1]==0 || p[2]==0) break;
523         }
524     }else{
525         for(i=0; i<3; i++)
526             for(j=0; j<256; j++){
527                 int d= FFMIN(j, 256-j);
528                 
529                 s->stats[i][j]= 100000000/(d+1);
530             }
531     }
532     
533     for(i=0; i<3; i++){
534         generate_len_table(s->len[i], s->stats[i], 256);
535
536         if(generate_bits_table(s->bits[i], s->len[i])<0){
537             return -1;
538         }
539         
540         store_table(s, s->len[i]);
541     }
542
543     for(i=0; i<3; i++)
544         for(j=0; j<256; j++)
545             s->stats[i][j]= 0;
546     
547     s->interlaced= height > 288;
548
549 //    printf("pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced);
550
551     s->picture_number=0;
552
553     return 0;
554 }
555
556 static void decode_422_bitstream(HYuvContext *s, int count){
557     int i;
558
559     count/=2;
560     
561     for(i=0; i<count; i++){
562         s->temp[0][2*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); 
563         s->temp[1][  i  ]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
564         s->temp[0][2*i+1]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); 
565         s->temp[2][  i  ]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); 
566     }
567 }
568
569 static void decode_gray_bitstream(HYuvContext *s, int count){
570     int i;
571     
572     count/=2;
573     
574     for(i=0; i<count; i++){
575         s->temp[0][2*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); 
576         s->temp[0][2*i+1]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); 
577     }
578 }
579
580 static void encode_422_bitstream(HYuvContext *s, int count){
581     int i;
582     
583     count/=2;
584     if(s->flags&CODEC_FLAG_PASS1){
585         for(i=0; i<count; i++){
586             s->stats[0][ s->temp[0][2*i  ] ]++;
587             s->stats[1][ s->temp[1][  i  ] ]++;
588             s->stats[0][ s->temp[0][2*i+1] ]++;
589             s->stats[2][ s->temp[2][  i  ] ]++;
590         }
591     }else{
592         for(i=0; i<count; i++){
593             put_bits(&s->pb, s->len[0][ s->temp[0][2*i  ] ], s->bits[0][ s->temp[0][2*i  ] ]);
594             put_bits(&s->pb, s->len[1][ s->temp[1][  i  ] ], s->bits[1][ s->temp[1][  i  ] ]);
595             put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]);
596             put_bits(&s->pb, s->len[2][ s->temp[2][  i  ] ], s->bits[2][ s->temp[2][  i  ] ]);
597         }
598     }
599 }
600
601 static void encode_gray_bitstream(HYuvContext *s, int count){
602     int i;
603     
604     count/=2;
605     if(s->flags&CODEC_FLAG_PASS1){
606         for(i=0; i<count; i++){
607             s->stats[0][ s->temp[0][2*i  ] ]++;
608             s->stats[0][ s->temp[0][2*i+1] ]++;
609         }
610     }else{
611         for(i=0; i<count; i++){
612             put_bits(&s->pb, s->len[0][ s->temp[0][2*i  ] ], s->bits[0][ s->temp[0][2*i  ] ]);
613             put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]);
614         }
615     }
616 }
617
618 static void decode_bgr_bitstream(HYuvContext *s, int count){
619     int i;
620
621     if(s->decorrelate){
622         if(s->bitstream_bpp==24){
623             for(i=0; i<count; i++){
624                 s->temp[0][4*i+1]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
625                 s->temp[0][4*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+1];
626                 s->temp[0][4*i+2]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+1];
627             }
628         }else{
629             for(i=0; i<count; i++){
630                 s->temp[0][4*i+1]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
631                 s->temp[0][4*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+1];
632                 s->temp[0][4*i+2]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+1]; 
633                                    get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?!
634             }
635         }
636     }else{
637         if(s->bitstream_bpp==24){
638             for(i=0; i<count; i++){
639                 s->temp[0][4*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
640                 s->temp[0][4*i+1]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
641                 s->temp[0][4*i+2]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); 
642             }
643         }else{
644             for(i=0; i<count; i++){
645                 s->temp[0][4*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
646                 s->temp[0][4*i+1]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
647                 s->temp[0][4*i+2]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); 
648                                    get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?!
649             }
650         }
651     }
652 }
653
654 static void draw_slice(HYuvContext *s, int y){
655     int h, cy;
656     int offset[4];
657     
658     if(s->avctx->draw_horiz_band==NULL) 
659         return;
660         
661     h= y - s->last_slice_end;
662     y -= h;
663     
664     if(s->bitstream_bpp==12){
665         cy= y>>1;
666     }else{
667         cy= y;
668     }
669
670     offset[0] = s->picture.linesize[0]*y;
671     offset[1] = s->picture.linesize[1]*cy;
672     offset[2] = s->picture.linesize[2]*cy;
673     offset[3] = 0;
674     emms_c();
675
676     s->avctx->draw_horiz_band(s->avctx, &s->picture, offset, y, 3, h);
677     
678     s->last_slice_end= y + h;
679 }
680
681 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
682     HYuvContext *s = avctx->priv_data;
683     const int width= s->width;
684     const int width2= s->width>>1;
685     const int height= s->height;
686     int fake_ystride, fake_ustride, fake_vstride;
687     AVFrame * const p= &s->picture;
688
689     AVFrame *picture = data;
690
691     *data_size = 0;
692
693     /* no supplementary picture */
694     if (buf_size == 0)
695         return 0;
696
697     s->dsp.bswap_buf((uint32_t*)s->bitstream_buffer, (uint32_t*)buf, buf_size/4);
698     
699     init_get_bits(&s->gb, s->bitstream_buffer, buf_size*8);
700
701     if(p->data[0])
702         avctx->release_buffer(avctx, p);
703
704     p->reference= 0;
705     if(avctx->get_buffer(avctx, p) < 0){
706         fprintf(stderr, "get_buffer() failed\n");
707         return -1;
708     }
709
710     fake_ystride= s->interlaced ? p->linesize[0]*2  : p->linesize[0];
711     fake_ustride= s->interlaced ? p->linesize[1]*2  : p->linesize[1];
712     fake_vstride= s->interlaced ? p->linesize[2]*2  : p->linesize[2];
713     
714     s->last_slice_end= 0;
715         
716     if(s->bitstream_bpp<24){
717         int y, cy;
718         int lefty, leftu, leftv;
719         int lefttopy, lefttopu, lefttopv;
720         
721         if(s->yuy2){
722             p->data[0][3]= get_bits(&s->gb, 8);
723             p->data[0][2]= get_bits(&s->gb, 8);
724             p->data[0][1]= get_bits(&s->gb, 8);
725             p->data[0][0]= get_bits(&s->gb, 8);
726             
727             fprintf(stderr, "YUY2 output isnt implemenetd yet\n");
728             return -1;
729         }else{
730         
731             leftv= p->data[2][0]= get_bits(&s->gb, 8);
732             lefty= p->data[0][1]= get_bits(&s->gb, 8);
733             leftu= p->data[1][0]= get_bits(&s->gb, 8);
734                    p->data[0][0]= get_bits(&s->gb, 8);
735         
736             switch(s->predictor){
737             case LEFT:
738             case PLANE:
739                 decode_422_bitstream(s, width-2);
740                 lefty= add_left_prediction(p->data[0] + 2, s->temp[0], width-2, lefty);
741                 if(!(s->flags&CODEC_FLAG_GRAY)){
742                     leftu= add_left_prediction(p->data[1] + 1, s->temp[1], width2-1, leftu);
743                     leftv= add_left_prediction(p->data[2] + 1, s->temp[2], width2-1, leftv);
744                 }
745
746                 for(cy=y=1; y<s->height; y++,cy++){
747                     uint8_t *ydst, *udst, *vdst;
748                     
749                     if(s->bitstream_bpp==12){
750                         decode_gray_bitstream(s, width);
751                     
752                         ydst= p->data[0] + p->linesize[0]*y;
753
754                         lefty= add_left_prediction(ydst, s->temp[0], width, lefty);
755                         if(s->predictor == PLANE){
756                             if(y>s->interlaced)
757                                 s->dsp.add_bytes(ydst, ydst - fake_ystride, width);
758                         }
759                         y++;
760                         if(y>=s->height) break;
761                     }
762                     
763                     draw_slice(s, y);
764                     
765                     ydst= p->data[0] + p->linesize[0]*y;
766                     udst= p->data[1] + p->linesize[1]*cy;
767                     vdst= p->data[2] + p->linesize[2]*cy;
768                     
769                     decode_422_bitstream(s, width);
770                     lefty= add_left_prediction(ydst, s->temp[0], width, lefty);
771                     if(!(s->flags&CODEC_FLAG_GRAY)){
772                         leftu= add_left_prediction(udst, s->temp[1], width2, leftu);
773                         leftv= add_left_prediction(vdst, s->temp[2], width2, leftv);
774                     }
775                     if(s->predictor == PLANE){
776                         if(cy>s->interlaced){
777                             s->dsp.add_bytes(ydst, ydst - fake_ystride, width);
778                             if(!(s->flags&CODEC_FLAG_GRAY)){
779                                 s->dsp.add_bytes(udst, udst - fake_ustride, width2);
780                                 s->dsp.add_bytes(vdst, vdst - fake_vstride, width2);
781                             }
782                         }
783                     }
784                 }
785                 draw_slice(s, height);
786                 
787                 break;
788             case MEDIAN:
789                 /* first line except first 2 pixels is left predicted */
790                 decode_422_bitstream(s, width-2);
791                 lefty= add_left_prediction(p->data[0] + 2, s->temp[0], width-2, lefty);
792                 if(!(s->flags&CODEC_FLAG_GRAY)){
793                     leftu= add_left_prediction(p->data[1] + 1, s->temp[1], width2-1, leftu);
794                     leftv= add_left_prediction(p->data[2] + 1, s->temp[2], width2-1, leftv);
795                 }
796                 
797                 cy=y=1;
798                 
799                 /* second line is left predicted for interlaced case */
800                 if(s->interlaced){
801                     decode_422_bitstream(s, width);
802                     lefty= add_left_prediction(p->data[0] + p->linesize[0], s->temp[0], width, lefty);
803                     if(!(s->flags&CODEC_FLAG_GRAY)){
804                         leftu= add_left_prediction(p->data[1] + p->linesize[2], s->temp[1], width2, leftu);
805                         leftv= add_left_prediction(p->data[2] + p->linesize[1], s->temp[2], width2, leftv);
806                     }
807                     y++; cy++;
808                 }
809
810                 /* next 4 pixels are left predicted too */
811                 decode_422_bitstream(s, 4);
812                 lefty= add_left_prediction(p->data[0] + fake_ystride, s->temp[0], 4, lefty);
813                 if(!(s->flags&CODEC_FLAG_GRAY)){
814                     leftu= add_left_prediction(p->data[1] + fake_ustride, s->temp[1], 2, leftu);
815                     leftv= add_left_prediction(p->data[2] + fake_vstride, s->temp[2], 2, leftv);
816                 }
817
818                 /* next line except the first 4 pixels is median predicted */
819                 lefttopy= p->data[0][3];
820                 decode_422_bitstream(s, width-4);
821                 add_median_prediction(p->data[0] + fake_ystride+4, p->data[0]+4, s->temp[0], width-4, &lefty, &lefttopy);
822                 if(!(s->flags&CODEC_FLAG_GRAY)){
823                     lefttopu= p->data[1][1];
824                     lefttopv= p->data[2][1];
825                     add_median_prediction(p->data[1] + fake_ustride+2, p->data[1]+2, s->temp[1], width2-2, &leftu, &lefttopu);
826                     add_median_prediction(p->data[2] + fake_vstride+2, p->data[2]+2, s->temp[2], width2-2, &leftv, &lefttopv);
827                 }
828                 y++; cy++;
829                 
830                 for(; y<height; y++,cy++){
831                     uint8_t *ydst, *udst, *vdst;
832
833                     if(s->bitstream_bpp==12){
834                         while(2*cy > y){
835                             decode_gray_bitstream(s, width);
836                             ydst= p->data[0] + p->linesize[0]*y;
837                             add_median_prediction(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy);
838                             y++;
839                         }
840                         if(y>=height) break;
841                     }
842                     draw_slice(s, y);
843
844                     decode_422_bitstream(s, width);
845
846                     ydst= p->data[0] + p->linesize[0]*y;
847                     udst= p->data[1] + p->linesize[1]*cy;
848                     vdst= p->data[2] + p->linesize[2]*cy;
849
850                     add_median_prediction(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy);
851                     if(!(s->flags&CODEC_FLAG_GRAY)){
852                         add_median_prediction(udst, udst - fake_ustride, s->temp[1], width2, &leftu, &lefttopu);
853                         add_median_prediction(vdst, vdst - fake_vstride, s->temp[2], width2, &leftv, &lefttopv);
854                     }
855                 }
856
857                 draw_slice(s, height);
858                 break;
859             }
860         }
861     }else{
862         int y;
863         int leftr, leftg, leftb;
864         const int last_line= (height-1)*p->linesize[0];
865         
866         if(s->bitstream_bpp==32){
867                    p->data[0][last_line+3]= get_bits(&s->gb, 8);
868             leftr= p->data[0][last_line+2]= get_bits(&s->gb, 8);
869             leftg= p->data[0][last_line+1]= get_bits(&s->gb, 8);
870             leftb= p->data[0][last_line+0]= get_bits(&s->gb, 8);
871         }else{
872             leftr= p->data[0][last_line+2]= get_bits(&s->gb, 8);
873             leftg= p->data[0][last_line+1]= get_bits(&s->gb, 8);
874             leftb= p->data[0][last_line+0]= get_bits(&s->gb, 8);
875             skip_bits(&s->gb, 8);
876         }
877         
878         if(s->bgr32){
879             switch(s->predictor){
880             case LEFT:
881             case PLANE:
882                 decode_bgr_bitstream(s, width-1);
883                 add_left_prediction_bgr32(p->data[0] + last_line+4, s->temp[0], width-1, &leftr, &leftg, &leftb);
884
885                 for(y=s->height-2; y>=0; y--){ //yes its stored upside down
886                     decode_bgr_bitstream(s, width);
887                     
888                     add_left_prediction_bgr32(p->data[0] + p->linesize[0]*y, s->temp[0], width, &leftr, &leftg, &leftb);
889                     if(s->predictor == PLANE){
890                         if((y&s->interlaced)==0){
891                             s->dsp.add_bytes(p->data[0] + p->linesize[0]*y, 
892                                              p->data[0] + p->linesize[0]*y + fake_ystride, fake_ystride);
893                         }
894                     }
895                 }
896                 draw_slice(s, height); // just 1 large slice as this isnt possible in reverse order
897                 break;
898             default:
899                 fprintf(stderr, "prediction type not supported!\n");
900             }
901         }else{
902
903             fprintf(stderr, "BGR24 output isnt implemenetd yet\n");
904             return -1;
905         }
906     }
907     emms_c();
908     
909     *picture= *p;
910     *data_size = sizeof(AVFrame);
911     
912     return (get_bits_count(&s->gb)+31)/32*4;
913 }
914
915 static int decode_end(AVCodecContext *avctx)
916 {
917     HYuvContext *s = avctx->priv_data;
918     int i;
919     
920     for(i=0; i<3; i++){
921         free_vlc(&s->vlc[i]);
922     }
923     
924     avcodec_default_free_buffers(avctx);
925
926     return 0;
927 }
928
929 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
930     HYuvContext *s = avctx->priv_data;
931     AVFrame *pict = data;
932     const int width= s->width;
933     const int width2= s->width>>1;
934     const int height= s->height;
935     const int fake_ystride= s->interlaced ? pict->linesize[0]*2  : pict->linesize[0];
936     const int fake_ustride= s->interlaced ? pict->linesize[1]*2  : pict->linesize[1];
937     const int fake_vstride= s->interlaced ? pict->linesize[2]*2  : pict->linesize[2];
938     AVFrame * const p= &s->picture;
939     int i, size;
940
941     init_put_bits(&s->pb, buf, buf_size);
942     
943     *p = *pict;
944     p->pict_type= FF_I_TYPE;
945     p->key_frame= 1;
946     
947     if(avctx->pix_fmt == PIX_FMT_YUV422P || avctx->pix_fmt == PIX_FMT_YUV420P){
948         int lefty, leftu, leftv, y, cy;
949
950         put_bits(&s->pb, 8, leftv= p->data[2][0]);
951         put_bits(&s->pb, 8, lefty= p->data[0][1]);
952         put_bits(&s->pb, 8, leftu= p->data[1][0]);
953         put_bits(&s->pb, 8,        p->data[0][0]);
954         
955         lefty= sub_left_prediction(s, s->temp[0], p->data[0]+2, width-2 , lefty);
956         leftu= sub_left_prediction(s, s->temp[1], p->data[1]+1, width2-1, leftu);
957         leftv= sub_left_prediction(s, s->temp[2], p->data[2]+1, width2-1, leftv);
958         
959         encode_422_bitstream(s, width-2);
960         
961         if(s->predictor==MEDIAN){
962             int lefttopy, lefttopu, lefttopv;
963             cy=y=1;
964             if(s->interlaced){
965                 lefty= sub_left_prediction(s, s->temp[0], p->data[0]+p->linesize[0], width , lefty);
966                 leftu= sub_left_prediction(s, s->temp[1], p->data[1]+p->linesize[1], width2, leftu);
967                 leftv= sub_left_prediction(s, s->temp[2], p->data[2]+p->linesize[2], width2, leftv);
968         
969                 encode_422_bitstream(s, width);
970                 y++; cy++;
971             }
972             
973             lefty= sub_left_prediction(s, s->temp[0], p->data[0]+fake_ystride, 4, lefty);
974             leftu= sub_left_prediction(s, s->temp[1], p->data[1]+fake_ystride, 2, leftu);
975             leftv= sub_left_prediction(s, s->temp[2], p->data[2]+fake_ystride, 2, leftv);
976         
977             encode_422_bitstream(s, 4);
978
979             lefttopy= p->data[0][3];
980             lefttopu= p->data[1][1];
981             lefttopv= p->data[2][1];
982             s->dsp.sub_hfyu_median_prediction(s->temp[0], p->data[0]+4, p->data[0] + fake_ystride+4, width-4 , &lefty, &lefttopy);
983             s->dsp.sub_hfyu_median_prediction(s->temp[1], p->data[1]+2, p->data[1] + fake_ustride+2, width2-2, &leftu, &lefttopu);
984             s->dsp.sub_hfyu_median_prediction(s->temp[2], p->data[2]+2, p->data[2] + fake_vstride+2, width2-2, &leftv, &lefttopv);
985             encode_422_bitstream(s, width-4);
986             y++; cy++;
987
988             for(; y<height; y++,cy++){
989                 uint8_t *ydst, *udst, *vdst;
990                     
991                 if(s->bitstream_bpp==12){
992                     while(2*cy > y){
993                         ydst= p->data[0] + p->linesize[0]*y;
994                         s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy);
995                         encode_gray_bitstream(s, width);
996                         y++;
997                     }
998                     if(y>=height) break;
999                 }
1000                 ydst= p->data[0] + p->linesize[0]*y;
1001                 udst= p->data[1] + p->linesize[1]*cy;
1002                 vdst= p->data[2] + p->linesize[2]*cy;
1003
1004                 s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy);
1005                 s->dsp.sub_hfyu_median_prediction(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu);
1006                 s->dsp.sub_hfyu_median_prediction(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv);
1007
1008                 encode_422_bitstream(s, width);
1009             }
1010         }else{
1011             for(cy=y=1; y<height; y++,cy++){
1012                 uint8_t *ydst, *udst, *vdst;
1013                 
1014                 /* encode a luma only line & y++ */
1015                 if(s->bitstream_bpp==12){
1016                     ydst= p->data[0] + p->linesize[0]*y;
1017
1018                     if(s->predictor == PLANE && s->interlaced < y){
1019                         s->dsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width);
1020
1021                         lefty= sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty);
1022                     }else{
1023                         lefty= sub_left_prediction(s, s->temp[0], ydst, width , lefty);
1024                     }
1025                     encode_gray_bitstream(s, width);
1026                     y++;
1027                     if(y>=height) break;
1028                 }
1029                 
1030                 ydst= p->data[0] + p->linesize[0]*y;
1031                 udst= p->data[1] + p->linesize[1]*cy;
1032                 vdst= p->data[2] + p->linesize[2]*cy;
1033
1034                 if(s->predictor == PLANE && s->interlaced < cy){
1035                     s->dsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width);
1036                     s->dsp.diff_bytes(s->temp[2], udst, udst - fake_ustride, width2);
1037                     s->dsp.diff_bytes(s->temp[2] + 1250, vdst, vdst - fake_vstride, width2);
1038
1039                     lefty= sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty);
1040                     leftu= sub_left_prediction(s, s->temp[1], s->temp[2], width2, leftu);
1041                     leftv= sub_left_prediction(s, s->temp[2], s->temp[2] + 1250, width2, leftv);
1042                 }else{
1043                     lefty= sub_left_prediction(s, s->temp[0], ydst, width , lefty);
1044                     leftu= sub_left_prediction(s, s->temp[1], udst, width2, leftu);
1045                     leftv= sub_left_prediction(s, s->temp[2], vdst, width2, leftv);
1046                 }
1047
1048                 encode_422_bitstream(s, width);
1049             }
1050         }        
1051     }else{
1052         fprintf(stderr, "Format not supported!\n");
1053     }
1054     emms_c();
1055     
1056     size= (get_bit_count(&s->pb)+31)/32;
1057     
1058     if((s->flags&CODEC_FLAG_PASS1) && (s->picture_number&31)==0){
1059         int j;
1060         char *p= avctx->stats_out;
1061         for(i=0; i<3; i++){
1062             for(j=0; j<256; j++){
1063                 sprintf(p, "%llu ", s->stats[i][j]);
1064                 p+= strlen(p);
1065                 s->stats[i][j]= 0;
1066             }
1067             sprintf(p, "\n");
1068             p++;
1069         }
1070     }else{
1071         flush_put_bits(&s->pb);
1072         s->dsp.bswap_buf((uint32_t*)buf, (uint32_t*)buf, size);
1073     }
1074     
1075     s->picture_number++;
1076
1077     return size*4;
1078 }
1079
1080 static int encode_end(AVCodecContext *avctx)
1081 {
1082 //    HYuvContext *s = avctx->priv_data;
1083
1084     av_freep(&avctx->extradata);
1085     av_freep(&avctx->stats_out);
1086     
1087     return 0;
1088 }
1089
1090 static const AVOption huffyuv_options[] =
1091 {
1092     AVOPTION_CODEC_INT("prediction_method", "prediction_method", prediction_method, 0, 2, 0),
1093     AVOPTION_END()
1094 };
1095
1096 AVCodec huffyuv_decoder = {
1097     "huffyuv",
1098     CODEC_TYPE_VIDEO,
1099     CODEC_ID_HUFFYUV,
1100     sizeof(HYuvContext),
1101     decode_init,
1102     NULL,
1103     decode_end,
1104     decode_frame,
1105     CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND,
1106     NULL
1107 };
1108
1109 #ifdef CONFIG_ENCODERS
1110
1111 AVCodec huffyuv_encoder = {
1112     "huffyuv",
1113     CODEC_TYPE_VIDEO,
1114     CODEC_ID_HUFFYUV,
1115     sizeof(HYuvContext),
1116     encode_init,
1117     encode_frame,
1118     encode_end,
1119     .options = huffyuv_options,
1120 };
1121
1122 #endif //CONFIG_ENCODERS