git.sesse.net Git - ffmpeg/blob - libavcodec/4xm.c

   1 /*
   2  * 4XM codec
   3  * Copyright (c) 2003 Michael Niedermayer
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * 4XM codec.
  25  */
  26
  27 #include "libavutil/intreadwrite.h"
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "get_bits.h"
  31 #include "bytestream.h"
  32
  33 //#undef NDEBUG
  34 //#include <assert.h>
  35
  36 #define BLOCK_TYPE_VLC_BITS 5
  37 #define ACDC_VLC_BITS 9
  38
  39 #define CFRAME_BUFFER_COUNT 100
  40
  41 static const uint8_t block_type_tab[2][4][8][2]={
  42  {
  43   {   //{8,4,2}x{8,4,2}
  44     { 0,1}, { 2,2}, { 6,3}, {14,4}, {30,5}, {31,5}, { 0,0}
  45   },{ //{8,4}x1
  46     { 0,1}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}, { 0,0}
  47   },{ //1x{8,4}
  48     { 0,1}, { 2,2}, { 0,0}, { 6,3}, {14,4}, {15,4}, { 0,0}
  49   },{ //1x2, 2x1
  50     { 0,1}, { 0,0}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}
  51   }
  52  },{
  53   {  //{8,4,2}x{8,4,2}
  54     { 1,2}, { 4,3}, { 5,3}, {0,2}, {6,3}, {7,3}, {0,0}
  55   },{//{8,4}x1
  56     { 1,2}, { 0,0}, { 2,2}, {0,2}, {6,3}, {7,3}, {0,0}
  57   },{//1x{8,4}
  58     { 1,2}, { 2,2}, { 0,0}, {0,2}, {6,3}, {7,3}, {0,0}
  59   },{//1x2, 2x1
  60     { 1,2}, { 0,0}, { 0,0}, {0,2}, {2,2}, {6,3}, {7,3}
  61   }
  62  }
  63 };
  64
  65 static const uint8_t size2index[4][4]={
  66   {-1, 3, 1, 1},
  67   { 3, 0, 0, 0},
  68   { 2, 0, 0, 0},
  69   { 2, 0, 0, 0},
  70 };
  71
  72 static const int8_t mv[256][2]={
  73 {  0,  0},{  0, -1},{ -1,  0},{  1,  0},{  0,  1},{ -1, -1},{  1, -1},{ -1,  1},
  74 {  1,  1},{  0, -2},{ -2,  0},{  2,  0},{  0,  2},{ -1, -2},{  1, -2},{ -2, -1},
  75 {  2, -1},{ -2,  1},{  2,  1},{ -1,  2},{  1,  2},{ -2, -2},{  2, -2},{ -2,  2},
  76 {  2,  2},{  0, -3},{ -3,  0},{  3,  0},{  0,  3},{ -1, -3},{  1, -3},{ -3, -1},
  77 {  3, -1},{ -3,  1},{  3,  1},{ -1,  3},{  1,  3},{ -2, -3},{  2, -3},{ -3, -2},
  78 {  3, -2},{ -3,  2},{  3,  2},{ -2,  3},{  2,  3},{  0, -4},{ -4,  0},{  4,  0},
  79 {  0,  4},{ -1, -4},{  1, -4},{ -4, -1},{  4, -1},{  4,  1},{ -1,  4},{  1,  4},
  80 { -3, -3},{ -3,  3},{  3,  3},{ -2, -4},{ -4, -2},{  4, -2},{ -4,  2},{ -2,  4},
  81 {  2,  4},{ -3, -4},{  3, -4},{  4, -3},{ -5,  0},{ -4,  3},{ -3,  4},{  3,  4},
  82 { -1, -5},{ -5, -1},{ -5,  1},{ -1,  5},{ -2, -5},{  2, -5},{  5, -2},{  5,  2},
  83 { -4, -4},{ -4,  4},{ -3, -5},{ -5, -3},{ -5,  3},{  3,  5},{ -6,  0},{  0,  6},
  84 { -6, -1},{ -6,  1},{  1,  6},{  2, -6},{ -6,  2},{  2,  6},{ -5, -4},{  5,  4},
  85 {  4,  5},{ -6, -3},{  6,  3},{ -7,  0},{ -1, -7},{  5, -5},{ -7,  1},{ -1,  7},
  86 {  4, -6},{  6,  4},{ -2, -7},{ -7,  2},{ -3, -7},{  7, -3},{  3,  7},{  6, -5},
  87 {  0, -8},{ -1, -8},{ -7, -4},{ -8,  1},{  4,  7},{  2, -8},{ -2,  8},{  6,  6},
  88 { -8,  3},{  5, -7},{ -5,  7},{  8, -4},{  0, -9},{ -9, -1},{  1,  9},{  7, -6},
  89 { -7,  6},{ -5, -8},{ -5,  8},{ -9,  3},{  9, -4},{  7, -7},{  8, -6},{  6,  8},
  90 { 10,  1},{-10,  2},{  9, -5},{ 10, -3},{ -8, -7},{-10, -4},{  6, -9},{-11,  0},
  91 { 11,  1},{-11, -2},{ -2, 11},{  7, -9},{ -7,  9},{ 10,  6},{ -4, 11},{  8, -9},
  92 {  8,  9},{  5, 11},{  7,-10},{ 12, -3},{ 11,  6},{ -9, -9},{  8, 10},{  5, 12},
  93 {-11,  7},{ 13,  2},{  6,-12},{ 10,  9},{-11,  8},{ -7, 12},{  0, 14},{ 14, -2},
  94 { -9, 11},{ -6, 13},{-14, -4},{ -5,-14},{  5, 14},{-15, -1},{-14, -6},{  3,-15},
  95 { 11,-11},{ -7, 14},{ -5, 15},{  8,-14},{ 15,  6},{  3, 16},{  7,-15},{-16,  5},
  96 {  0, 17},{-16, -6},{-10, 14},{-16,  7},{ 12, 13},{-16,  8},{-17,  6},{-18,  3},
  97 { -7, 17},{ 15, 11},{ 16, 10},{  2,-19},{  3,-19},{-11,-16},{-18,  8},{-19, -6},
  98 {  2,-20},{-17,-11},{-10,-18},{  8, 19},{-21, -1},{-20,  7},{ -4, 21},{ 21,  5},
  99 { 15, 16},{  2,-22},{-10,-20},{-22,  5},{ 20,-11},{ -7,-22},{-12, 20},{ 23, -5},
 100 { 13,-20},{ 24, -2},{-15, 19},{-11, 22},{ 16, 19},{ 23,-10},{-18,-18},{ -9,-24},
 101 { 24,-10},{ -3, 26},{-23, 13},{-18,-20},{ 17, 21},{ -4, 27},{ 27,  6},{  1,-28},
 102 {-11, 26},{-17,-23},{  7, 28},{ 11,-27},{ 29,  5},{-23,-19},{-28,-11},{-21, 22},
 103 {-30,  7},{-17, 26},{-27, 16},{ 13, 29},{ 19,-26},{ 10,-31},{-14,-30},{ 20,-27},
 104 {-29, 18},{-16,-31},{-28,-22},{ 21,-30},{-25, 28},{ 26,-29},{ 25,-32},{-32,-32}
 105 };
 106
 107 // this is simply the scaled down elementwise product of the standard jpeg quantizer table and the AAN premul table
 108 static const uint8_t dequant_table[64]={
 109  16, 15, 13, 19, 24, 31, 28, 17,
 110  17, 23, 25, 31, 36, 63, 45, 21,
 111  18, 24, 27, 37, 52, 59, 49, 20,
 112  16, 28, 34, 40, 60, 80, 51, 20,
 113  18, 31, 48, 66, 68, 86, 56, 21,
 114  19, 38, 56, 59, 64, 64, 48, 20,
 115  27, 48, 55, 55, 56, 51, 35, 15,
 116  20, 35, 34, 32, 31, 22, 15,  8,
 117 };
 118
 119 static VLC block_type_vlc[2][4];
 120
 121
 122 typedef struct CFrameBuffer{
 123     unsigned int allocated_size;
 124     unsigned int size;
 125     int id;
 126     uint8_t *data;
 127 }CFrameBuffer;
 128
 129 typedef struct FourXContext{
 130     AVCodecContext *avctx;
 131     DSPContext dsp;
 132     AVFrame current_picture, last_picture;
 133     GetBitContext pre_gb;          ///< ac/dc prefix
 134     GetBitContext gb;
 135     const uint8_t *bytestream;
 136     const uint8_t *bytestream_end;
 137     const uint16_t *wordstream;
 138     const uint16_t *wordstream_end;
 139     int mv[256];
 140     VLC pre_vlc;
 141     int last_dc;
 142     DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
 143     void *bitstream_buffer;
 144     unsigned int bitstream_buffer_size;
 145     int version;
 146     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 147 } FourXContext;
 148
 149
 150 #define FIX_1_082392200  70936
 151 #define FIX_1_414213562  92682
 152 #define FIX_1_847759065 121095
 153 #define FIX_2_613125930 171254
 154
 155 #define MULTIPLY(var,const)  (((var)*(const)) >> 16)
 156
 157 static void idct(DCTELEM block[64]){
 158     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 159     int tmp10, tmp11, tmp12, tmp13;
 160     int z5, z10, z11, z12, z13;
 161     int i;
 162     int temp[64];
 163
 164     for(i=0; i<8; i++){
 165         tmp10 = block[8*0 + i] + block[8*4 + i];
 166         tmp11 = block[8*0 + i] - block[8*4 + i];
 167
 168         tmp13 =          block[8*2 + i] + block[8*6 + i];
 169         tmp12 = MULTIPLY(block[8*2 + i] - block[8*6 + i], FIX_1_414213562) - tmp13;
 170
 171         tmp0 = tmp10 + tmp13;
 172         tmp3 = tmp10 - tmp13;
 173         tmp1 = tmp11 + tmp12;
 174         tmp2 = tmp11 - tmp12;
 175
 176         z13 = block[8*5 + i] + block[8*3 + i];
 177         z10 = block[8*5 + i] - block[8*3 + i];
 178         z11 = block[8*1 + i] + block[8*7 + i];
 179         z12 = block[8*1 + i] - block[8*7 + i];
 180
 181         tmp7  =          z11 + z13;
 182         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 183
 184         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 185         tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
 186         tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
 187
 188         tmp6 = tmp12 - tmp7;
 189         tmp5 = tmp11 - tmp6;
 190         tmp4 = tmp10 + tmp5;
 191
 192         temp[8*0 + i] = tmp0 + tmp7;
 193         temp[8*7 + i] = tmp0 - tmp7;
 194         temp[8*1 + i] = tmp1 + tmp6;
 195         temp[8*6 + i] = tmp1 - tmp6;
 196         temp[8*2 + i] = tmp2 + tmp5;
 197         temp[8*5 + i] = tmp2 - tmp5;
 198         temp[8*4 + i] = tmp3 + tmp4;
 199         temp[8*3 + i] = tmp3 - tmp4;
 200     }
 201
 202     for(i=0; i<8*8; i+=8){
 203         tmp10 = temp[0 + i] + temp[4 + i];
 204         tmp11 = temp[0 + i] - temp[4 + i];
 205
 206         tmp13 = temp[2 + i] + temp[6 + i];
 207         tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
 208
 209         tmp0 = tmp10 + tmp13;
 210         tmp3 = tmp10 - tmp13;
 211         tmp1 = tmp11 + tmp12;
 212         tmp2 = tmp11 - tmp12;
 213
 214         z13 = temp[5 + i] + temp[3 + i];
 215         z10 = temp[5 + i] - temp[3 + i];
 216         z11 = temp[1 + i] + temp[7 + i];
 217         z12 = temp[1 + i] - temp[7 + i];
 218
 219         tmp7 = z11 + z13;
 220         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 221
 222         z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
 223         tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
 224         tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
 225
 226         tmp6 = tmp12 - tmp7;
 227         tmp5 = tmp11 - tmp6;
 228         tmp4 = tmp10 + tmp5;
 229
 230         block[0 + i] = (tmp0 + tmp7)>>6;
 231         block[7 + i] = (tmp0 - tmp7)>>6;
 232         block[1 + i] = (tmp1 + tmp6)>>6;
 233         block[6 + i] = (tmp1 - tmp6)>>6;
 234         block[2 + i] = (tmp2 + tmp5)>>6;
 235         block[5 + i] = (tmp2 - tmp5)>>6;
 236         block[4 + i] = (tmp3 + tmp4)>>6;
 237         block[3 + i] = (tmp3 - tmp4)>>6;
 238     }
 239 }
 240
 241 static av_cold void init_vlcs(FourXContext *f){
 242     static VLC_TYPE table[8][32][2];
 243     int i;
 244
 245     for(i=0; i<8; i++){
 246         block_type_vlc[0][i].table= table[i];
 247         block_type_vlc[0][i].table_allocated= 32;
 248         init_vlc(&block_type_vlc[0][i], BLOCK_TYPE_VLC_BITS, 7,
 249                  &block_type_tab[0][i][0][1], 2, 1,
 250                  &block_type_tab[0][i][0][0], 2, 1, INIT_VLC_USE_NEW_STATIC);
 251     }
 252 }
 253
 254 static void init_mv(FourXContext *f){
 255     int i;
 256
 257     for(i=0; i<256; i++){
 258         if(f->version>1)
 259             f->mv[i] = mv[i][0]   + mv[i][1]  *f->current_picture.linesize[0]/2;
 260         else
 261             f->mv[i] = (i&15) - 8 + ((i>>4)-8)*f->current_picture.linesize[0]/2;
 262     }
 263 }
 264
 265 #if HAVE_BIGENDIAN
 266 #define LE_CENTRIC_MUL(dst, src, scale, dc) \
 267     { \
 268         unsigned tmpval = AV_RN32(src);                 \
 269         tmpval = (tmpval <<  16) | (tmpval >>  16);     \
 270         tmpval = tmpval * (scale) + (dc);               \
 271         tmpval = (tmpval <<  16) | (tmpval >>  16);     \
 272         AV_WN32A(dst, tmpval);                          \
 273     }
 274 #else
 275 #define LE_CENTRIC_MUL(dst, src, scale, dc) \
 276     { \
 277         unsigned tmpval = AV_RN32(src) * (scale) + (dc); \
 278         AV_WN32A(dst, tmpval);                           \
 279     }
 280 #endif
 281
 282 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w, int h, int stride, int scale, unsigned dc){
 283    int i;
 284    dc*= 0x10001;
 285
 286    switch(log2w){
 287    case 0:
 288         for(i=0; i<h; i++){
 289             dst[0] = scale*src[0] + dc;
 290             if(scale) src += stride;
 291             dst += stride;
 292         }
 293         break;
 294     case 1:
 295         for(i=0; i<h; i++){
 296             LE_CENTRIC_MUL(dst, src, scale, dc);
 297             if(scale) src += stride;
 298             dst += stride;
 299         }
 300         break;
 301     case 2:
 302         for(i=0; i<h; i++){
 303             LE_CENTRIC_MUL(dst,     src,     scale, dc);
 304             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 305             if(scale) src += stride;
 306             dst += stride;
 307         }
 308         break;
 309     case 3:
 310         for(i=0; i<h; i++){
 311             LE_CENTRIC_MUL(dst,     src,     scale, dc);
 312             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 313             LE_CENTRIC_MUL(dst + 4, src + 4, scale, dc);
 314             LE_CENTRIC_MUL(dst + 6, src + 6, scale, dc);
 315             if(scale) src += stride;
 316             dst += stride;
 317         }
 318         break;
 319     default: assert(0);
 320     }
 321 }
 322
 323 static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src, int log2w, int log2h, int stride){
 324     const int index= size2index[log2h][log2w];
 325     const int h= 1<<log2h;
 326     int code= get_vlc2(&f->gb, block_type_vlc[1-(f->version>1)][index].table, BLOCK_TYPE_VLC_BITS, 1);
 327     uint16_t *start= (uint16_t*)f->last_picture.data[0];
 328     uint16_t *end= start + stride*(f->avctx->height-h+1) - (1<<log2w);
 329
 330     assert(code>=0 && code<=6);
 331
 332     if(code == 0){
 333         if (f->bytestream_end - f->bytestream < 1){
 334             av_log(f->avctx, AV_LOG_ERROR, "bytestream overread\n");
 335             return;
 336         }
 337         src += f->mv[ *f->bytestream++ ];
 338         if(start > src || src > end){
 339             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 340             return;
 341         }
 342         mcdc(dst, src, log2w, h, stride, 1, 0);
 343     }else if(code == 1){
 344         log2h--;
 345         decode_p_block(f, dst                  , src                  , log2w, log2h, stride);
 346         decode_p_block(f, dst + (stride<<log2h), src + (stride<<log2h), log2w, log2h, stride);
 347     }else if(code == 2){
 348         log2w--;
 349         decode_p_block(f, dst             , src             , log2w, log2h, stride);
 350         decode_p_block(f, dst + (1<<log2w), src + (1<<log2w), log2w, log2h, stride);
 351     }else if(code == 3 && f->version<2){
 352         mcdc(dst, src, log2w, h, stride, 1, 0);
 353     }else if(code == 4){
 354         if (f->bytestream_end - f->bytestream < 1){
 355             av_log(f->avctx, AV_LOG_ERROR, "bytestream overread\n");
 356             return;
 357         }
 358         src += f->mv[ *f->bytestream++ ];
 359         if(start > src || src > end){
 360             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 361             return;
 362         }
 363         if (f->wordstream_end - f->wordstream < 1){
 364             av_log(f->avctx, AV_LOG_ERROR, "wordstream overread\n");
 365             return;
 366         }
 367         mcdc(dst, src, log2w, h, stride, 1, av_le2ne16(*f->wordstream++));
 368     }else if(code == 5){
 369         if (f->wordstream_end - f->wordstream < 1){
 370             av_log(f->avctx, AV_LOG_ERROR, "wordstream overread\n");
 371             return;
 372         }
 373         mcdc(dst, src, log2w, h, stride, 0, av_le2ne16(*f->wordstream++));
 374     }else if(code == 6){
 375         if (f->wordstream_end - f->wordstream < 2){
 376             av_log(f->avctx, AV_LOG_ERROR, "wordstream overread\n");
 377             return;
 378         }
 379         if(log2w){
 380             dst[0] = av_le2ne16(*f->wordstream++);
 381             dst[1] = av_le2ne16(*f->wordstream++);
 382         }else{
 383             dst[0     ] = av_le2ne16(*f->wordstream++);
 384             dst[stride] = av_le2ne16(*f->wordstream++);
 385         }
 386     }
 387 }
 388
 389 static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length){
 390     int x, y;
 391     const int width= f->avctx->width;
 392     const int height= f->avctx->height;
 393     uint16_t *src= (uint16_t*)f->last_picture.data[0];
 394     uint16_t *dst= (uint16_t*)f->current_picture.data[0];
 395     const int stride= f->current_picture.linesize[0]>>1;
 396     unsigned int bitstream_size, bytestream_size, wordstream_size, extra;
 397
 398     if(f->version>1){
 399         extra=20;
 400         if (length < extra)
 401             return -1;
 402         bitstream_size= AV_RL32(buf+8);
 403         wordstream_size= AV_RL32(buf+12);
 404         bytestream_size= AV_RL32(buf+16);
 405     }else{
 406         extra=0;
 407         bitstream_size = AV_RL16(buf-4);
 408         wordstream_size= AV_RL16(buf-2);
 409         bytestream_size= FFMAX(length - bitstream_size - wordstream_size, 0);
 410     }
 411
 412     if (bitstream_size > length ||
 413         bytestream_size > length - bitstream_size ||
 414         wordstream_size > length - bytestream_size - bitstream_size ||
 415         extra > length - bytestream_size - bitstream_size - wordstream_size){
 416         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size,
 417         bitstream_size+ bytestream_size+ wordstream_size - length);
 418         return -1;
 419     }
 420
 421     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size, bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 422     if (!f->bitstream_buffer)
 423         return AVERROR(ENOMEM);
 424     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)(buf + extra), bitstream_size/4);
 425     memset((uint8_t*)f->bitstream_buffer + bitstream_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
 426     init_get_bits(&f->gb, f->bitstream_buffer, 8*bitstream_size);
 427
 428     f->wordstream= (const uint16_t*)(buf + extra + bitstream_size);
 429     f->wordstream_end= f->wordstream + wordstream_size/2;
 430     f->bytestream= buf + extra + bitstream_size + wordstream_size;
 431     f->bytestream_end = f->bytestream + bytestream_size;
 432
 433     init_mv(f);
 434
 435     for(y=0; y<height; y+=8){
 436         for(x=0; x<width; x+=8){
 437             decode_p_block(f, dst + x, src + x, 3, 3, stride);
 438         }
 439         src += 8*stride;
 440         dst += 8*stride;
 441     }
 442
 443     if(   bitstream_size != (get_bits_count(&f->gb)+31)/32*4
 444        || (((const char*)f->wordstream - (const char*)buf + 2)&~2) != extra + bitstream_size + wordstream_size
 445        || (((const char*)f->bytestream - (const char*)buf + 3)&~3) != extra + bitstream_size + wordstream_size + bytestream_size)
 446         av_log(f->avctx, AV_LOG_ERROR, " %d %td %td bytes left\n",
 447             bitstream_size - (get_bits_count(&f->gb)+31)/32*4,
 448             -(((const char*)f->bytestream - (const char*)buf + 3)&~3) + (extra + bitstream_size + wordstream_size + bytestream_size),
 449             -(((const char*)f->wordstream - (const char*)buf + 2)&~2) + (extra + bitstream_size + wordstream_size)
 450         );
 451
 452     return 0;
 453 }
 454
 455 /**
 456  * decode block and dequantize.
 457  * Note this is almost identical to MJPEG.
 458  */
 459 static int decode_i_block(FourXContext *f, DCTELEM *block){
 460     int code, i, j, level, val;
 461
 462     /* DC coef */
 463     val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 464     if (val>>4){
 465         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 466     }
 467
 468     if(val)
 469         val = get_xbits(&f->gb, val);
 470
 471     val = val * dequant_table[0] + f->last_dc;
 472     f->last_dc =
 473     block[0] = val;
 474     /* AC coefs */
 475     i = 1;
 476     for(;;) {
 477         code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 478
 479         /* EOB */
 480         if (code == 0)
 481             break;
 482         if (code == 0xf0) {
 483             i += 16;
 484         } else {
 485             level = get_xbits(&f->gb, code & 0xf);
 486             i += code >> 4;
 487             if (i >= 64) {
 488                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
 489                 return 0;
 490             }
 491
 492             j= ff_zigzag_direct[i];
 493             block[j] = level * dequant_table[j];
 494             i++;
 495             if (i >= 64)
 496                 break;
 497         }
 498     }
 499
 500     return 0;
 501 }
 502
 503 static inline void idct_put(FourXContext *f, int x, int y){
 504     DCTELEM (*block)[64]= f->block;
 505     int stride= f->current_picture.linesize[0]>>1;
 506     int i;
 507     uint16_t *dst = ((uint16_t*)f->current_picture.data[0]) + y * stride + x;
 508
 509     for(i=0; i<4; i++){
 510         block[i][0] += 0x80*8*8;
 511         idct(block[i]);
 512     }
 513
 514     if(!(f->avctx->flags&CODEC_FLAG_GRAY)){
 515         for(i=4; i<6; i++) idct(block[i]);
 516     }
 517
 518 /* Note transform is:
 519 y= ( 1b + 4g + 2r)/14
 520 cb=( 3b - 2g - 1r)/14
 521 cr=(-1b - 4g + 5r)/14
 522 */
 523     for(y=0; y<8; y++){
 524         for(x=0; x<8; x++){
 525             DCTELEM *temp= block[(x>>2) + 2*(y>>2)] + 2*(x&3) + 2*8*(y&3); //FIXME optimize
 526             int cb= block[4][x + 8*y];
 527             int cr= block[5][x + 8*y];
 528             int cg= (cb + cr)>>1;
 529             int y;
 530
 531             cb+=cb;
 532
 533             y = temp[0];
 534             dst[0       ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 535             y = temp[1];
 536             dst[1       ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 537             y = temp[8];
 538             dst[  stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 539             y = temp[9];
 540             dst[1+stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 541             dst += 2;
 542         }
 543         dst += 2*stride - 2*8;
 544     }
 545 }
 546
 547 static int decode_i_mb(FourXContext *f){
 548     int i;
 549
 550     f->dsp.clear_blocks(f->block[0]);
 551
 552     for(i=0; i<6; i++){
 553         if(decode_i_block(f, f->block[i]) < 0)
 554             return -1;
 555     }
 556
 557     return 0;
 558 }
 559
 560 static const uint8_t *read_huffman_tables(FourXContext *f, const uint8_t * const buf, int buf_size){
 561     int frequency[512];
 562     uint8_t flag[512];
 563     int up[512];
 564     uint8_t len_tab[257];
 565     int bits_tab[257];
 566     int start, end;
 567     const uint8_t *ptr= buf;
 568     const uint8_t *ptr_end = buf + buf_size;
 569     int j;
 570
 571     memset(frequency, 0, sizeof(frequency));
 572     memset(up, -1, sizeof(up));
 573
 574     start= *ptr++;
 575     end= *ptr++;
 576     for(;;){
 577         int i;
 578
 579         if (start <= end && ptr_end - ptr < end - start + 1 + 1)
 580             return NULL;
 581         for(i=start; i<=end; i++){
 582             frequency[i]= *ptr++;
 583         }
 584         start= *ptr++;
 585         if(start==0) break;
 586
 587         end= *ptr++;
 588     }
 589     frequency[256]=1;
 590
 591     while((ptr - buf)&3) ptr++; // 4byte align
 592
 593     for(j=257; j<512; j++){
 594         int min_freq[2]= {256*256, 256*256};
 595         int smallest[2]= {0, 0};
 596         int i;
 597         for(i=0; i<j; i++){
 598             if(frequency[i] == 0) continue;
 599             if(frequency[i] < min_freq[1]){
 600                 if(frequency[i] < min_freq[0]){
 601                     min_freq[1]= min_freq[0]; smallest[1]= smallest[0];
 602                     min_freq[0]= frequency[i];smallest[0]= i;
 603                 }else{
 604                     min_freq[1]= frequency[i];smallest[1]= i;
 605                 }
 606             }
 607         }
 608         if(min_freq[1] == 256*256) break;
 609
 610         frequency[j]= min_freq[0] + min_freq[1];
 611         flag[ smallest[0] ]= 0;
 612         flag[ smallest[1] ]= 1;
 613         up[ smallest[0] ]=
 614         up[ smallest[1] ]= j;
 615         frequency[ smallest[0] ]= frequency[ smallest[1] ]= 0;
 616     }
 617
 618     for(j=0; j<257; j++){
 619         int node;
 620         int len=0;
 621         int bits=0;
 622
 623         for(node= j; up[node] != -1; node= up[node]){
 624             bits += flag[node]<<len;
 625             len++;
 626             if(len > 31) av_log(f->avctx, AV_LOG_ERROR, "vlc length overflow\n"); //can this happen at all ?
 627         }
 628
 629         bits_tab[j]= bits;
 630         len_tab[j]= len;
 631     }
 632
 633     if (init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257,
 634                  len_tab , 1, 1,
 635                  bits_tab, 4, 4, 0))
 636         return NULL;
 637
 638     return ptr;
 639 }
 640
 641 static int mix(int c0, int c1){
 642     int blue = 2*(c0&0x001F) + (c1&0x001F);
 643     int green= (2*(c0&0x03E0) + (c1&0x03E0))>>5;
 644     int red  = 2*(c0>>10) + (c1>>10);
 645     return red/3*1024 + green/3*32 + blue/3;
 646 }
 647
 648 static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length){
 649     int x, y, x2, y2;
 650     const int width= f->avctx->width;
 651     const int height= f->avctx->height;
 652     uint16_t *dst= (uint16_t*)f->current_picture.data[0];
 653     const int stride= f->current_picture.linesize[0]>>1;
 654     const uint8_t *buf_end = buf + length;
 655
 656     for(y=0; y<height; y+=16){
 657         for(x=0; x<width; x+=16){
 658             unsigned int color[4], bits;
 659             if (buf_end - buf < 8)
 660                 return -1;
 661             memset(color, 0, sizeof(color));
 662 //warning following is purely guessed ...
 663             color[0]= bytestream_get_le16(&buf);
 664             color[1]= bytestream_get_le16(&buf);
 665
 666             if(color[0]&0x8000) av_log(NULL, AV_LOG_ERROR, "unk bit 1\n");
 667             if(color[1]&0x8000) av_log(NULL, AV_LOG_ERROR, "unk bit 2\n");
 668
 669             color[2]= mix(color[0], color[1]);
 670             color[3]= mix(color[1], color[0]);
 671
 672             bits= bytestream_get_le32(&buf);
 673             for(y2=0; y2<16; y2++){
 674                 for(x2=0; x2<16; x2++){
 675                     int index= 2*(x2>>2) + 8*(y2>>2);
 676                     dst[y2*stride+x2]= color[(bits>>index)&3];
 677                 }
 678             }
 679             dst+=16;
 680         }
 681         dst += 16*stride - width;
 682     }
 683
 684     return 0;
 685 }
 686
 687 static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length){
 688     int x, y;
 689     const int width= f->avctx->width;
 690     const int height= f->avctx->height;
 691     uint16_t *dst= (uint16_t*)f->current_picture.data[0];
 692     const int stride= f->current_picture.linesize[0]>>1;
 693     const unsigned int bitstream_size= AV_RL32(buf);
 694     unsigned int prestream_size;
 695     const uint8_t *prestream;
 696
 697     if (bitstream_size > (1<<26) || length < bitstream_size + 12)
 698         return -1;
 699     prestream_size = 4*AV_RL32(buf + bitstream_size + 4);
 700     prestream = buf + bitstream_size + 12;
 701
 702     if (prestream_size > (1<<26) ||
 703         prestream_size != length - (bitstream_size + 12)){
 704         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n", prestream_size, bitstream_size, length);
 705         return -1;
 706     }
 707
 708     prestream= read_huffman_tables(f, prestream, buf + length - prestream);
 709     if (!prestream)
 710         return -1;
 711
 712     init_get_bits(&f->gb, buf + 4, 8*bitstream_size);
 713
 714     prestream_size= length + buf - prestream;
 715
 716     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size, prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 717     if (!f->bitstream_buffer)
 718         return AVERROR(ENOMEM);
 719     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)prestream, prestream_size/4);
 720     memset((uint8_t*)f->bitstream_buffer + prestream_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
 721     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8*prestream_size);
 722
 723     f->last_dc= 0*128*8*8;
 724
 725     for(y=0; y<height; y+=16){
 726         for(x=0; x<width; x+=16){
 727             if(decode_i_mb(f) < 0)
 728                 return -1;
 729
 730             idct_put(f, x, y);
 731         }
 732         dst += 16*stride;
 733     }
 734
 735     if(get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
 736         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 737
 738     return 0;
 739 }
 740
 741 static int decode_frame(AVCodecContext *avctx,
 742                         void *data, int *data_size,
 743                         AVPacket *avpkt)
 744 {
 745     const uint8_t *buf = avpkt->data;
 746     int buf_size = avpkt->size;
 747     FourXContext * const f = avctx->priv_data;
 748     AVFrame *picture = data;
 749     AVFrame *p, temp;
 750     int i, frame_4cc, frame_size;
 751
 752     if (buf_size < 12)
 753         return AVERROR_INVALIDDATA;
 754     frame_4cc= AV_RL32(buf);
 755     if(buf_size != AV_RL32(buf+4)+8 || buf_size < 20){
 756         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n", buf_size, AV_RL32(buf+4));
 757     }
 758
 759     if(frame_4cc == AV_RL32("cfrm")){
 760         int free_index=-1;
 761         const int data_size= buf_size - 20;
 762         const int id= AV_RL32(buf+12);
 763         const int whole_size= AV_RL32(buf+16);
 764         CFrameBuffer *cfrm;
 765
 766         if (data_size < 0 || whole_size < 0){
 767             av_log(f->avctx, AV_LOG_ERROR, "sizes invalid\n");
 768             return AVERROR_INVALIDDATA;
 769         }
 770
 771         for(i=0; i<CFRAME_BUFFER_COUNT; i++){
 772             if(f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
 773                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n", f->cfrm[i].id);
 774         }
 775
 776         for(i=0; i<CFRAME_BUFFER_COUNT; i++){
 777             if(f->cfrm[i].id   == id) break;
 778             if(f->cfrm[i].size == 0 ) free_index= i;
 779         }
 780
 781         if(i>=CFRAME_BUFFER_COUNT){
 782             i= free_index;
 783             f->cfrm[i].id= id;
 784         }
 785         cfrm= &f->cfrm[i];
 786
 787         if (data_size > UINT_MAX -  cfrm->size - FF_INPUT_BUFFER_PADDING_SIZE)
 788             return AVERROR_INVALIDDATA;
 789         cfrm->data= av_fast_realloc(cfrm->data, &cfrm->allocated_size, cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
 790         if(!cfrm->data){ //explicit check needed as memcpy below might not catch a NULL
 791             av_log(f->avctx, AV_LOG_ERROR, "realloc falure");
 792             return -1;
 793         }
 794
 795         memcpy(cfrm->data + cfrm->size, buf+20, data_size);
 796         cfrm->size += data_size;
 797
 798         if(cfrm->size >= whole_size){
 799             buf= cfrm->data;
 800             frame_size= cfrm->size;
 801
 802             if(id != avctx->frame_number){
 803                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n", id, avctx->frame_number);
 804             }
 805
 806             cfrm->size= cfrm->id= 0;
 807             frame_4cc= AV_RL32("pfrm");
 808         }else
 809             return buf_size;
 810     }else{
 811         buf= buf + 12;
 812         frame_size= buf_size - 12;
 813     }
 814
 815     temp= f->current_picture;
 816     f->current_picture= f->last_picture;
 817     f->last_picture= temp;
 818
 819     p= &f->current_picture;
 820     avctx->coded_frame= p;
 821
 822     avctx->flags |= CODEC_FLAG_EMU_EDGE; // alternatively we would have to use our own buffer management
 823
 824     p->reference= 3;
 825     if (avctx->reget_buffer(avctx, p) < 0) {
 826         av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
 827         return -1;
 828     }
 829
 830     if(frame_4cc == AV_RL32("ifr2")){
 831         p->pict_type= AV_PICTURE_TYPE_I;
 832         if(decode_i2_frame(f, buf-4, frame_size+4) < 0){
 833             av_log(f->avctx, AV_LOG_ERROR, "decode i2 frame failed\n");
 834             return -1;
 835         }
 836     }else if(frame_4cc == AV_RL32("ifrm")){
 837         p->pict_type= AV_PICTURE_TYPE_I;
 838         if(decode_i_frame(f, buf, frame_size) < 0){
 839             av_log(f->avctx, AV_LOG_ERROR, "decode i frame failed\n");
 840             return -1;
 841         }
 842     }else if(frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")){
 843         if(!f->last_picture.data[0]){
 844             f->last_picture.reference= 3;
 845             if(avctx->get_buffer(avctx, &f->last_picture) < 0){
 846                 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 847                 return -1;
 848             }
 849         }
 850
 851         p->pict_type= AV_PICTURE_TYPE_P;
 852         if(decode_p_frame(f, buf, frame_size) < 0){
 853             av_log(f->avctx, AV_LOG_ERROR, "decode p frame failed\n");
 854             return -1;
 855         }
 856     }else if(frame_4cc == AV_RL32("snd_")){
 857         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n", buf_size);
 858     }else{
 859         av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n", buf_size);
 860     }
 861
 862     p->key_frame= p->pict_type == AV_PICTURE_TYPE_I;
 863
 864     *picture= *p;
 865     *data_size = sizeof(AVPicture);
 866
 867     emms_c();
 868
 869     return buf_size;
 870 }
 871
 872
 873 static av_cold void common_init(AVCodecContext *avctx){
 874     FourXContext * const f = avctx->priv_data;
 875
 876     dsputil_init(&f->dsp, avctx);
 877
 878     f->avctx= avctx;
 879 }
 880
 881 static av_cold int decode_init(AVCodecContext *avctx){
 882     FourXContext * const f = avctx->priv_data;
 883
 884     if(avctx->extradata_size != 4 || !avctx->extradata) {
 885         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
 886         return 1;
 887     }
 888     if((avctx->width % 16) || (avctx->height % 16)) {
 889         av_log(avctx, AV_LOG_ERROR, "unsupported width/height\n");
 890         return AVERROR_INVALIDDATA;
 891     }
 892
 893     avcodec_get_frame_defaults(&f->current_picture);
 894     avcodec_get_frame_defaults(&f->last_picture);
 895     f->version= AV_RL32(avctx->extradata)>>16;
 896     common_init(avctx);
 897     init_vlcs(f);
 898
 899     if(f->version>2) avctx->pix_fmt= PIX_FMT_RGB565;
 900     else             avctx->pix_fmt= PIX_FMT_BGR555;
 901
 902     return 0;
 903 }
 904
 905
 906 static av_cold int decode_end(AVCodecContext *avctx){
 907     FourXContext * const f = avctx->priv_data;
 908     int i;
 909
 910     av_freep(&f->bitstream_buffer);
 911     f->bitstream_buffer_size=0;
 912     for(i=0; i<CFRAME_BUFFER_COUNT; i++){
 913         av_freep(&f->cfrm[i].data);
 914         f->cfrm[i].allocated_size= 0;
 915     }
 916     free_vlc(&f->pre_vlc);
 917     if(f->current_picture.data[0])
 918         avctx->release_buffer(avctx, &f->current_picture);
 919     if(f->last_picture.data[0])
 920         avctx->release_buffer(avctx, &f->last_picture);
 921
 922     return 0;
 923 }
 924
 925 AVCodec ff_fourxm_decoder = {
 926     .name           = "4xm",
 927     .type           = AVMEDIA_TYPE_VIDEO,
 928     .id             = CODEC_ID_4XM,
 929     .priv_data_size = sizeof(FourXContext),
 930     .init           = decode_init,
 931     .close          = decode_end,
 932     .decode         = decode_frame,
 933     .capabilities   = CODEC_CAP_DR1,
 934     .long_name = NULL_IF_CONFIG_SMALL("4X Movie"),
 935 };
 936