git.sesse.net Git - ffmpeg/blob - libavcodec/4xm.c

   1 /*
   2  * 4XM codec
   3  * Copyright (c) 2003 Michael Niedermayer
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * 4XM codec.
  25  */
  26
  27 #include <inttypes.h>
  28
  29 #include "libavutil/frame.h"
  30 #include "libavutil/imgutils.h"
  31 #include "libavutil/intreadwrite.h"
  32
  33 #include "avcodec.h"
  34 #include "bitstream.h"
  35 #include "blockdsp.h"
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "internal.h"
  39
  40 #define BLOCK_TYPE_VLC_BITS 5
  41 #define ACDC_VLC_BITS 9
  42
  43 #define CFRAME_BUFFER_COUNT 100
  44
  45 static const uint8_t block_type_tab[2][4][8][2] = {
  46     {
  47         {    // { 8, 4, 2 } x { 8, 4, 2}
  48             { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 30, 5 }, { 31, 5 }, { 0, 0 }
  49         }, { // { 8, 4 } x 1
  50             { 0, 1 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  51         }, { // 1 x { 8, 4 }
  52             { 0, 1 }, { 2, 2 }, { 0, 0 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  53         }, { // 1 x 2, 2 x 1
  54             { 0, 1 }, { 0, 0 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }
  55         }
  56     }, {
  57         {   // { 8, 4, 2 } x { 8, 4, 2}
  58             { 1, 2 }, { 4, 3 }, { 5, 3 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  59         }, {// { 8, 4 } x 1
  60             { 1, 2 }, { 0, 0 }, { 2, 2 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  61         }, {// 1 x { 8, 4 }
  62             { 1, 2 }, { 2, 2 }, { 0, 0 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  63         }, {// 1 x 2, 2 x 1
  64             { 1, 2 }, { 0, 0 }, { 0, 0 }, { 0, 2 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
  65       }
  66     }
  67 };
  68
  69 static const uint8_t size2index[4][4] = {
  70     { -1, 3, 1, 1 },
  71     {  3, 0, 0, 0 },
  72     {  2, 0, 0, 0 },
  73     {  2, 0, 0, 0 },
  74 };
  75
  76 static const int8_t mv[256][2] = {
  77     {   0,   0 }, {   0,  -1 }, {  -1,   0 }, {   1,   0 }, {   0,   1 }, {  -1,  -1 }, {   1,  -1 }, {  -1,   1 },
  78     {   1,   1 }, {   0,  -2 }, {  -2,   0 }, {   2,   0 }, {   0,   2 }, {  -1,  -2 }, {   1,  -2 }, {  -2,  -1 },
  79     {   2,  -1 }, {  -2,   1 }, {   2,   1 }, {  -1,   2 }, {   1,   2 }, {  -2,  -2 }, {   2,  -2 }, {  -2,   2 },
  80     {   2,   2 }, {   0,  -3 }, {  -3,   0 }, {   3,   0 }, {   0,   3 }, {  -1,  -3 }, {   1,  -3 }, {  -3,  -1 },
  81     {   3,  -1 }, {  -3,   1 }, {   3,   1 }, {  -1,   3 }, {   1,   3 }, {  -2,  -3 }, {   2,  -3 }, {  -3,  -2 },
  82     {   3,  -2 }, {  -3,   2 }, {   3,   2 }, {  -2,   3 }, {   2,   3 }, {   0,  -4 }, {  -4,   0 }, {   4,   0 },
  83     {   0,   4 }, {  -1,  -4 }, {   1,  -4 }, {  -4,  -1 }, {   4,  -1 }, {   4,   1 }, {  -1,   4 }, {   1,   4 },
  84     {  -3,  -3 }, {  -3,   3 }, {   3,   3 }, {  -2,  -4 }, {  -4,  -2 }, {   4,  -2 }, {  -4,   2 }, {  -2,   4 },
  85     {   2,   4 }, {  -3,  -4 }, {   3,  -4 }, {   4,  -3 }, {  -5,   0 }, {  -4,   3 }, {  -3,   4 }, {   3,   4 },
  86     {  -1,  -5 }, {  -5,  -1 }, {  -5,   1 }, {  -1,   5 }, {  -2,  -5 }, {   2,  -5 }, {   5,  -2 }, {   5,   2 },
  87     {  -4,  -4 }, {  -4,   4 }, {  -3,  -5 }, {  -5,  -3 }, {  -5,   3 }, {   3,   5 }, {  -6,   0 }, {   0,   6 },
  88     {  -6,  -1 }, {  -6,   1 }, {   1,   6 }, {   2,  -6 }, {  -6,   2 }, {   2,   6 }, {  -5,  -4 }, {   5,   4 },
  89     {   4,   5 }, {  -6,  -3 }, {   6,   3 }, {  -7,   0 }, {  -1,  -7 }, {   5,  -5 }, {  -7,   1 }, {  -1,   7 },
  90     {   4,  -6 }, {   6,   4 }, {  -2,  -7 }, {  -7,   2 }, {  -3,  -7 }, {   7,  -3 }, {   3,   7 }, {   6,  -5 },
  91     {   0,  -8 }, {  -1,  -8 }, {  -7,  -4 }, {  -8,   1 }, {   4,   7 }, {   2,  -8 }, {  -2,   8 }, {   6,   6 },
  92     {  -8,   3 }, {   5,  -7 }, {  -5,   7 }, {   8,  -4 }, {   0,  -9 }, {  -9,  -1 }, {   1,   9 }, {   7,  -6 },
  93     {  -7,   6 }, {  -5,  -8 }, {  -5,   8 }, {  -9,   3 }, {   9,  -4 }, {   7,  -7 }, {   8,  -6 }, {   6,   8 },
  94     {  10,   1 }, { -10,   2 }, {   9,  -5 }, {  10,  -3 }, {  -8,  -7 }, { -10,  -4 }, {   6,  -9 }, { -11,   0 },
  95     {  11,   1 }, { -11,  -2 }, {  -2,  11 }, {   7,  -9 }, {  -7,   9 }, {  10,   6 }, {  -4,  11 }, {   8,  -9 },
  96     {   8,   9 }, {   5,  11 }, {   7, -10 }, {  12,  -3 }, {  11,   6 }, {  -9,  -9 }, {   8,  10 }, {   5,  12 },
  97     { -11,   7 }, {  13,   2 }, {   6, -12 }, {  10,   9 }, { -11,   8 }, {  -7,  12 }, {   0,  14 }, {  14,  -2 },
  98     {  -9,  11 }, {  -6,  13 }, { -14,  -4 }, {  -5, -14 }, {   5,  14 }, { -15,  -1 }, { -14,  -6 }, {   3, -15 },
  99     {  11, -11 }, {  -7,  14 }, {  -5,  15 }, {   8, -14 }, {  15,   6 }, {   3,  16 }, {   7, -15 }, { -16,   5 },
 100     {   0,  17 }, { -16,  -6 }, { -10,  14 }, { -16,   7 }, {  12,  13 }, { -16,   8 }, { -17,   6 }, { -18,   3 },
 101     {  -7,  17 }, {  15,  11 }, {  16,  10 }, {   2, -19 }, {   3, -19 }, { -11, -16 }, { -18,   8 }, { -19,  -6 },
 102     {   2, -20 }, { -17, -11 }, { -10, -18 }, {   8,  19 }, { -21,  -1 }, { -20,   7 }, {  -4,  21 }, {  21,   5 },
 103     {  15,  16 }, {   2, -22 }, { -10, -20 }, { -22,   5 }, {  20, -11 }, {  -7, -22 }, { -12,  20 }, {  23,  -5 },
 104     {  13, -20 }, {  24,  -2 }, { -15,  19 }, { -11,  22 }, {  16,  19 }, {  23, -10 }, { -18, -18 }, {  -9, -24 },
 105     {  24, -10 }, {  -3,  26 }, { -23,  13 }, { -18, -20 }, {  17,  21 }, {  -4,  27 }, {  27,   6 }, {   1, -28 },
 106     { -11,  26 }, { -17, -23 }, {   7,  28 }, {  11, -27 }, {  29,   5 }, { -23, -19 }, { -28, -11 }, { -21,  22 },
 107     { -30,   7 }, { -17,  26 }, { -27,  16 }, {  13,  29 }, {  19, -26 }, {  10, -31 }, { -14, -30 }, {  20, -27 },
 108     { -29,  18 }, { -16, -31 }, { -28, -22 }, {  21, -30 }, { -25,  28 }, {  26, -29 }, {  25, -32 }, { -32, -32 }
 109 };
 110
 111 /* This is simply the scaled down elementwise product of the standard JPEG
 112  * quantizer table and the AAN premul table. */
 113 static const uint8_t dequant_table[64] = {
 114     16, 15, 13, 19, 24, 31, 28, 17,
 115     17, 23, 25, 31, 36, 63, 45, 21,
 116     18, 24, 27, 37, 52, 59, 49, 20,
 117     16, 28, 34, 40, 60, 80, 51, 20,
 118     18, 31, 48, 66, 68, 86, 56, 21,
 119     19, 38, 56, 59, 64, 64, 48, 20,
 120     27, 48, 55, 55, 56, 51, 35, 15,
 121     20, 35, 34, 32, 31, 22, 15,  8,
 122 };
 123
 124 static VLC block_type_vlc[2][4];
 125
 126
 127 typedef struct CFrameBuffer {
 128     unsigned int allocated_size;
 129     unsigned int size;
 130     int id;
 131     uint8_t *data;
 132 } CFrameBuffer;
 133
 134 typedef struct FourXContext {
 135     AVCodecContext *avctx;
 136     BlockDSPContext bdsp;
 137     BswapDSPContext bbdsp;
 138     uint16_t *frame_buffer;
 139     uint16_t *last_frame_buffer;
 140     BitstreamContext pre_bc;    // ac/dc prefix
 141     BitstreamContext bc;
 142     GetByteContext g;
 143     GetByteContext g2;
 144     int mv[256];
 145     VLC pre_vlc;
 146     int last_dc;
 147     DECLARE_ALIGNED(16, int16_t, block)[6][64];
 148     void *bitstream_buffer;
 149     unsigned int bitstream_buffer_size;
 150     int version;
 151     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 152 } FourXContext;
 153
 154
 155 #define FIX_1_082392200  70936
 156 #define FIX_1_414213562  92682
 157 #define FIX_1_847759065 121095
 158 #define FIX_2_613125930 171254
 159
 160 #define MULTIPLY(var, const) (((var) * (const)) >> 16)
 161
 162 static void idct(int16_t block[64])
 163 {
 164     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 165     int tmp10, tmp11, tmp12, tmp13;
 166     int z5, z10, z11, z12, z13;
 167     int i;
 168     int temp[64];
 169
 170     for (i = 0; i < 8; i++) {
 171         tmp10 = block[8 * 0 + i] + block[8 * 4 + i];
 172         tmp11 = block[8 * 0 + i] - block[8 * 4 + i];
 173
 174         tmp13 = block[8 * 2 + i] + block[8 * 6 + i];
 175         tmp12 = MULTIPLY(block[8 * 2 + i] - block[8 * 6 + i], FIX_1_414213562) - tmp13;
 176
 177         tmp0 = tmp10 + tmp13;
 178         tmp3 = tmp10 - tmp13;
 179         tmp1 = tmp11 + tmp12;
 180         tmp2 = tmp11 - tmp12;
 181
 182         z13 = block[8 * 5 + i] + block[8 * 3 + i];
 183         z10 = block[8 * 5 + i] - block[8 * 3 + i];
 184         z11 = block[8 * 1 + i] + block[8 * 7 + i];
 185         z12 = block[8 * 1 + i] - block[8 * 7 + i];
 186
 187         tmp7  =          z11 + z13;
 188         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 189
 190         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 191         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 192         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 193
 194         tmp6 = tmp12 - tmp7;
 195         tmp5 = tmp11 - tmp6;
 196         tmp4 = tmp10 + tmp5;
 197
 198         temp[8 * 0 + i] = tmp0 + tmp7;
 199         temp[8 * 7 + i] = tmp0 - tmp7;
 200         temp[8 * 1 + i] = tmp1 + tmp6;
 201         temp[8 * 6 + i] = tmp1 - tmp6;
 202         temp[8 * 2 + i] = tmp2 + tmp5;
 203         temp[8 * 5 + i] = tmp2 - tmp5;
 204         temp[8 * 4 + i] = tmp3 + tmp4;
 205         temp[8 * 3 + i] = tmp3 - tmp4;
 206     }
 207
 208     for (i = 0; i < 8 * 8; i += 8) {
 209         tmp10 = temp[0 + i] + temp[4 + i];
 210         tmp11 = temp[0 + i] - temp[4 + i];
 211
 212         tmp13 = temp[2 + i] + temp[6 + i];
 213         tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
 214
 215         tmp0 = tmp10 + tmp13;
 216         tmp3 = tmp10 - tmp13;
 217         tmp1 = tmp11 + tmp12;
 218         tmp2 = tmp11 - tmp12;
 219
 220         z13 = temp[5 + i] + temp[3 + i];
 221         z10 = temp[5 + i] - temp[3 + i];
 222         z11 = temp[1 + i] + temp[7 + i];
 223         z12 = temp[1 + i] - temp[7 + i];
 224
 225         tmp7  = z11 + z13;
 226         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 227
 228         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 229         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 230         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 231
 232         tmp6 = tmp12 - tmp7;
 233         tmp5 = tmp11 - tmp6;
 234         tmp4 = tmp10 + tmp5;
 235
 236         block[0 + i] = (tmp0 + tmp7) >> 6;
 237         block[7 + i] = (tmp0 - tmp7) >> 6;
 238         block[1 + i] = (tmp1 + tmp6) >> 6;
 239         block[6 + i] = (tmp1 - tmp6) >> 6;
 240         block[2 + i] = (tmp2 + tmp5) >> 6;
 241         block[5 + i] = (tmp2 - tmp5) >> 6;
 242         block[4 + i] = (tmp3 + tmp4) >> 6;
 243         block[3 + i] = (tmp3 - tmp4) >> 6;
 244     }
 245 }
 246
 247 static av_cold void init_vlcs(FourXContext *f)
 248 {
 249     static VLC_TYPE table[2][4][32][2];
 250     int i, j;
 251
 252     for (i = 0; i < 2; i++) {
 253         for (j = 0; j < 4; j++) {
 254             block_type_vlc[i][j].table           = table[i][j];
 255             block_type_vlc[i][j].table_allocated = 32;
 256             init_vlc(&block_type_vlc[i][j], BLOCK_TYPE_VLC_BITS, 7,
 257                      &block_type_tab[i][j][0][1], 2, 1,
 258                      &block_type_tab[i][j][0][0], 2, 1,
 259                      INIT_VLC_USE_NEW_STATIC);
 260         }
 261     }
 262 }
 263
 264 static void init_mv(FourXContext *f, int linesize)
 265 {
 266     int i;
 267
 268     for (i = 0; i < 256; i++) {
 269         if (f->version > 1)
 270             f->mv[i] = mv[i][0] + mv[i][1] * linesize / 2;
 271         else
 272             f->mv[i] = (i & 15) - 8 + ((i >> 4) - 8) * linesize / 2;
 273     }
 274 }
 275
 276 #if HAVE_BIGENDIAN
 277 #define LE_CENTRIC_MUL(dst, src, scale, dc)             \
 278     {                                                   \
 279         unsigned tmpval = AV_RN32(src);                 \
 280         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 281         tmpval = tmpval * (scale) + (dc);               \
 282         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 283         AV_WN32A(dst, tmpval);                          \
 284     }
 285 #else
 286 #define LE_CENTRIC_MUL(dst, src, scale, dc)              \
 287     {                                                    \
 288         unsigned tmpval = AV_RN32(src) * (scale) + (dc); \
 289         AV_WN32A(dst, tmpval);                           \
 290     }
 291 #endif
 292
 293 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w,
 294                         int h, int stride, int scale, unsigned dc)
 295 {
 296     int i;
 297     dc *= 0x10001;
 298
 299     switch (log2w) {
 300     case 0:
 301         for (i = 0; i < h; i++) {
 302             dst[0] = scale * src[0] + dc;
 303             if (scale)
 304                 src += stride;
 305             dst += stride;
 306         }
 307         break;
 308     case 1:
 309         for (i = 0; i < h; i++) {
 310             LE_CENTRIC_MUL(dst, src, scale, dc);
 311             if (scale)
 312                 src += stride;
 313             dst += stride;
 314         }
 315         break;
 316     case 2:
 317         for (i = 0; i < h; i++) {
 318             LE_CENTRIC_MUL(dst, src, scale, dc);
 319             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 320             if (scale)
 321                 src += stride;
 322             dst += stride;
 323         }
 324         break;
 325     case 3:
 326         for (i = 0; i < h; i++) {
 327             LE_CENTRIC_MUL(dst,     src,     scale, dc);
 328             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 329             LE_CENTRIC_MUL(dst + 4, src + 4, scale, dc);
 330             LE_CENTRIC_MUL(dst + 6, src + 6, scale, dc);
 331             if (scale)
 332                 src += stride;
 333             dst += stride;
 334         }
 335         break;
 336     default:
 337         break;
 338     }
 339 }
 340
 341 static int decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src,
 342                           int log2w, int log2h, int stride)
 343 {
 344     int index, h, code, ret, scale = 1;
 345     uint16_t *start, *end;
 346     unsigned dc = 0;
 347
 348     if (log2h < 0 || log2w < 0)
 349         return AVERROR_INVALIDDATA;
 350
 351     index = size2index[log2h][log2w];
 352     if (index < 0)
 353         return AVERROR_INVALIDDATA;
 354
 355     h     = 1 << log2h;
 356     code  = bitstream_read_vlc(&f->bc, block_type_vlc[1 - (f->version > 1)][index].table,
 357                                BLOCK_TYPE_VLC_BITS, 1);
 358     if (code < 0 || code > 6)
 359         return AVERROR_INVALIDDATA;
 360
 361     start = f->last_frame_buffer;
 362     end   = start + stride * (f->avctx->height - h + 1) - (1 << log2w);
 363
 364     if (code == 1) {
 365         if (--log2h < 0)
 366             return AVERROR_INVALIDDATA;
 367         if ((ret = decode_p_block(f, dst, src, log2w, log2h, stride)) < 0)
 368             return ret;
 369         return decode_p_block(f, dst + (stride << log2h),
 370                               src + (stride << log2h),
 371                               log2w, log2h, stride);
 372     } else if (code == 2) {
 373         log2w--;
 374         if ((ret = decode_p_block(f, dst , src, log2w, log2h, stride)) < 0)
 375             return ret;
 376         return decode_p_block(f, dst + (1 << log2w),
 377                               src + (1 << log2w),
 378                               log2w, log2h, stride);
 379     } else if (code == 6) {
 380         if (log2w) {
 381             dst[0]      = bytestream2_get_le16(&f->g2);
 382             dst[1]      = bytestream2_get_le16(&f->g2);
 383         } else {
 384             dst[0]      = bytestream2_get_le16(&f->g2);
 385             dst[stride] = bytestream2_get_le16(&f->g2);
 386         }
 387         return 0;
 388     }
 389
 390     if (code == 0) {
 391         src  += f->mv[bytestream2_get_byte(&f->g)];
 392     } else if (code == 3 && f->version >= 2) {
 393         return 0;
 394     } else if (code == 4) {
 395         src  += f->mv[bytestream2_get_byte(&f->g)];
 396         dc    = bytestream2_get_le16(&f->g2);
 397     } else if (code == 5) {
 398         scale = 0;
 399         dc    = bytestream2_get_le16(&f->g2);
 400     }
 401
 402     if (start > src || src > end) {
 403         av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 404         return AVERROR_INVALIDDATA;
 405     }
 406
 407     mcdc(dst, src, log2w, h, stride, scale, dc);
 408
 409     return 0;
 410 }
 411
 412 static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length)
 413 {
 414     int x, y;
 415     const int width  = f->avctx->width;
 416     const int height = f->avctx->height;
 417     uint16_t *dst    = f->frame_buffer;
 418     uint16_t *src;
 419     unsigned int bitstream_size, bytestream_size, wordstream_size, extra,
 420                  bytestream_offset, wordstream_offset;
 421     int ret;
 422
 423     src = f->last_frame_buffer;
 424
 425     if (f->version > 1) {
 426         if (length < 20)
 427             return AVERROR_INVALIDDATA;
 428         extra           = 20;
 429         bitstream_size  = AV_RL32(buf + 8);
 430         wordstream_size = AV_RL32(buf + 12);
 431         bytestream_size = AV_RL32(buf + 16);
 432     } else {
 433         extra           = 0;
 434         bitstream_size  = AV_RL16(buf - 4);
 435         wordstream_size = AV_RL16(buf - 2);
 436         bytestream_size = FFMAX(length - bitstream_size - wordstream_size, 0);
 437     }
 438
 439     if (bitstream_size + bytestream_size + wordstream_size + extra != length
 440         || bitstream_size  > (1 << 26)
 441         || bytestream_size > (1 << 26)
 442         || wordstream_size > (1 << 26)) {
 443         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n",
 444                bitstream_size, bytestream_size, wordstream_size,
 445                bitstream_size + bytestream_size + wordstream_size - length);
 446         return AVERROR_INVALIDDATA;
 447     }
 448
 449     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 450                    bitstream_size + AV_INPUT_BUFFER_PADDING_SIZE);
 451     if (!f->bitstream_buffer)
 452         return AVERROR(ENOMEM);
 453     f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) (buf + extra),
 454                        bitstream_size / 4);
 455     memset((uint8_t*)f->bitstream_buffer + bitstream_size,
 456            0, AV_INPUT_BUFFER_PADDING_SIZE);
 457     bitstream_init(&f->bc, f->bitstream_buffer, 8 * bitstream_size);
 458
 459     wordstream_offset = extra + bitstream_size;
 460     bytestream_offset = extra + bitstream_size + wordstream_size;
 461     bytestream2_init(&f->g2, buf + wordstream_offset,
 462                      length - wordstream_offset);
 463     bytestream2_init(&f->g, buf + bytestream_offset,
 464                      length - bytestream_offset);
 465
 466     init_mv(f, width * 2);
 467
 468     for (y = 0; y < height; y += 8) {
 469         for (x = 0; x < width; x += 8)
 470             if ((ret = decode_p_block(f, dst + x, src + x, 3, 3, width)) < 0)
 471                 return ret;
 472         src += 8 * width;
 473         dst += 8 * width;
 474     }
 475
 476     return 0;
 477 }
 478
 479 /**
 480  * decode block and dequantize.
 481  * Note this is almost identical to MJPEG.
 482  */
 483 static int decode_i_block(FourXContext *f, int16_t *block)
 484 {
 485     int code, i, j, level, val;
 486
 487     /* DC coef */
 488     val = bitstream_read_vlc(&f->pre_bc, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 489     if (val >> 4)
 490         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 491
 492     if (val)
 493         val = bitstream_read_xbits(&f->bc, val);
 494
 495     val        = val * dequant_table[0] + f->last_dc;
 496     f->last_dc = block[0] = val;
 497     /* AC coefs */
 498     i = 1;
 499     for (;;) {
 500         code = bitstream_read_vlc(&f->pre_bc, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 501
 502         /* EOB */
 503         if (code == 0)
 504             break;
 505         if (code == 0xf0) {
 506             i += 16;
 507         } else {
 508             level = bitstream_read_xbits(&f->bc, code & 0xf);
 509             i    += code >> 4;
 510             if (i >= 64) {
 511                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
 512                 return 0;
 513             }
 514
 515             j = ff_zigzag_direct[i];
 516             block[j] = level * dequant_table[j];
 517             i++;
 518             if (i >= 64)
 519                 break;
 520         }
 521     }
 522
 523     return 0;
 524 }
 525
 526 static inline void idct_put(FourXContext *f, int x, int y)
 527 {
 528     int16_t (*block)[64] = f->block;
 529     int stride           = f->avctx->width;
 530     int i;
 531     uint16_t *dst = f->frame_buffer + y * stride + x;
 532
 533     for (i = 0; i < 4; i++) {
 534         block[i][0] += 0x80 * 8 * 8;
 535         idct(block[i]);
 536     }
 537
 538     if (!(f->avctx->flags & AV_CODEC_FLAG_GRAY)) {
 539         for (i = 4; i < 6; i++)
 540             idct(block[i]);
 541     }
 542
 543     /* Note transform is:
 544      * y  = ( 1b + 4g + 2r) / 14
 545      * cb = ( 3b - 2g - 1r) / 14
 546      * cr = (-1b - 4g + 5r) / 14 */
 547     for (y = 0; y < 8; y++) {
 548         for (x = 0; x < 8; x++) {
 549             int16_t *temp = block[(x >> 2) + 2 * (y >> 2)] +
 550                             2 * (x & 3) + 2 * 8 * (y & 3); // FIXME optimize
 551             int cb = block[4][x + 8 * y];
 552             int cr = block[5][x + 8 * y];
 553             int cg = (cb + cr) >> 1;
 554             int y;
 555
 556             cb += cb;
 557
 558             y               = temp[0];
 559             dst[0]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 560             y               = temp[1];
 561             dst[1]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 562             y               = temp[8];
 563             dst[stride]     = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 564             y               = temp[9];
 565             dst[1 + stride] = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 566             dst            += 2;
 567         }
 568         dst += 2 * stride - 2 * 8;
 569     }
 570 }
 571
 572 static int decode_i_mb(FourXContext *f)
 573 {
 574     int ret;
 575     int i;
 576
 577     f->bdsp.clear_blocks(f->block[0]);
 578
 579     for (i = 0; i < 6; i++)
 580         if ((ret = decode_i_block(f, f->block[i])) < 0)
 581             return ret;
 582
 583     return 0;
 584 }
 585
 586 static const uint8_t *read_huffman_tables(FourXContext *f,
 587                                           const uint8_t * const buf,
 588                                           int len)
 589 {
 590     int frequency[512] = { 0 };
 591     uint8_t flag[512];
 592     int up[512];
 593     uint8_t len_tab[257];
 594     int bits_tab[257];
 595     int start, end;
 596     const uint8_t *ptr = buf;
 597     int j;
 598
 599     memset(up, -1, sizeof(up));
 600
 601     start = *ptr++;
 602     end   = *ptr++;
 603     for (;;) {
 604         int i;
 605
 606         len -= end - start + 1;
 607
 608         if (end < start || len < 0)
 609             return NULL;
 610
 611         for (i = start; i <= end; i++)
 612             frequency[i] = *ptr++;
 613         start = *ptr++;
 614         if (start == 0)
 615             break;
 616
 617         if (--len < 0)
 618             return NULL;
 619
 620         end = *ptr++;
 621     }
 622     frequency[256] = 1;
 623
 624     while ((ptr - buf) & 3)
 625         ptr++; // 4byte align
 626
 627     for (j = 257; j < 512; j++) {
 628         int min_freq[2] = { 256 * 256, 256 * 256 };
 629         int smallest[2] = { 0, 0 };
 630         int i;
 631         for (i = 0; i < j; i++) {
 632             if (frequency[i] == 0)
 633                 continue;
 634             if (frequency[i] < min_freq[1]) {
 635                 if (frequency[i] < min_freq[0]) {
 636                     min_freq[1] = min_freq[0];
 637                     smallest[1] = smallest[0];
 638                     min_freq[0] = frequency[i];
 639                     smallest[0] = i;
 640                 } else {
 641                     min_freq[1] = frequency[i];
 642                     smallest[1] = i;
 643                 }
 644             }
 645         }
 646         if (min_freq[1] == 256 * 256)
 647             break;
 648
 649         frequency[j]           = min_freq[0] + min_freq[1];
 650         flag[smallest[0]]      = 0;
 651         flag[smallest[1]]      = 1;
 652         up[smallest[0]]        =
 653         up[smallest[1]]        = j;
 654         frequency[smallest[0]] = frequency[smallest[1]] = 0;
 655     }
 656
 657     for (j = 0; j < 257; j++) {
 658         int node, len = 0, bits = 0;
 659
 660         for (node = j; up[node] != -1; node = up[node]) {
 661             bits += flag[node] << len;
 662             len++;
 663             if (len > 31)
 664                 // can this happen at all ?
 665                 av_log(f->avctx, AV_LOG_ERROR,
 666                        "vlc length overflow\n");
 667         }
 668
 669         bits_tab[j] = bits;
 670         len_tab[j]  = len;
 671     }
 672
 673     if (init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, len_tab, 1, 1,
 674                  bits_tab, 4, 4, 0))
 675         return NULL;
 676
 677     return ptr;
 678 }
 679
 680 static int mix(int c0, int c1)
 681 {
 682     int blue  =  2 * (c0 & 0x001F) + (c1 & 0x001F);
 683     int green = (2 * (c0 & 0x03E0) + (c1 & 0x03E0)) >> 5;
 684     int red   =  2 * (c0 >> 10)    + (c1 >> 10);
 685     return red / 3 * 1024 + green / 3 * 32 + blue / 3;
 686 }
 687
 688 static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length)
 689 {
 690     int x, y, x2, y2;
 691     const int width  = f->avctx->width;
 692     const int height = f->avctx->height;
 693     const int mbs    = (FFALIGN(width, 16) >> 4) * (FFALIGN(height, 16) >> 4);
 694     uint16_t *dst    = f->frame_buffer;
 695     GetByteContext g3;
 696
 697     if (length < mbs * 8) {
 698         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 699         return AVERROR_INVALIDDATA;
 700     }
 701     bytestream2_init(&g3, buf, length);
 702
 703     for (y = 0; y < height; y += 16) {
 704         for (x = 0; x < width; x += 16) {
 705             unsigned int color[4] = { 0 }, bits;
 706             // warning following is purely guessed ...
 707             color[0] = bytestream2_get_le16u(&g3);
 708             color[1] = bytestream2_get_le16u(&g3);
 709
 710             if (color[0] & 0x8000)
 711                 av_log(f->avctx, AV_LOG_ERROR, "unk bit 1\n");
 712             if (color[1] & 0x8000)
 713                 av_log(f->avctx, AV_LOG_ERROR, "unk bit 2\n");
 714
 715             color[2] = mix(color[0], color[1]);
 716             color[3] = mix(color[1], color[0]);
 717
 718             bits = bytestream2_get_le32u(&g3);
 719             for (y2 = 0; y2 < 16; y2++) {
 720                 for (x2 = 0; x2 < 16; x2++) {
 721                     int index = 2 * (x2 >> 2) + 8 * (y2 >> 2);
 722                     dst[y2 * width + x2] = color[(bits >> index) & 3];
 723                 }
 724             }
 725             dst += 16;
 726         }
 727         dst += 16 * width - x;
 728     }
 729
 730     return 0;
 731 }
 732
 733 static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length)
 734 {
 735     int x, y, ret;
 736     const int width  = f->avctx->width;
 737     const int height = f->avctx->height;
 738     const unsigned int bitstream_size = AV_RL32(buf);
 739     int token_count av_unused;
 740     unsigned int prestream_size;
 741     const uint8_t *prestream;
 742
 743     if (bitstream_size > (1 << 26))
 744         return AVERROR_INVALIDDATA;
 745
 746     if (length < bitstream_size + 12) {
 747         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 748         return AVERROR_INVALIDDATA;
 749     }
 750
 751     token_count    =     AV_RL32(buf + bitstream_size + 8);
 752     prestream_size = 4 * AV_RL32(buf + bitstream_size + 4);
 753     prestream      =             buf + bitstream_size + 12;
 754
 755     if (prestream_size + bitstream_size + 12 != length
 756         || prestream_size > (1 << 26)) {
 757         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n",
 758                prestream_size, bitstream_size, length);
 759         return AVERROR_INVALIDDATA;
 760     }
 761
 762     prestream = read_huffman_tables(f, prestream, prestream_size);
 763     if (!prestream) {
 764         av_log(f->avctx, AV_LOG_ERROR, "Error reading Huffman tables.\n");
 765         return AVERROR_INVALIDDATA;
 766     }
 767
 768     bitstream_init(&f->bc, buf + 4, 8 * bitstream_size);
 769
 770     prestream_size = length + buf - prestream;
 771
 772     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 773                    prestream_size + AV_INPUT_BUFFER_PADDING_SIZE);
 774     if (!f->bitstream_buffer)
 775         return AVERROR(ENOMEM);
 776     f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) prestream,
 777                        prestream_size / 4);
 778     memset((uint8_t*)f->bitstream_buffer + prestream_size,
 779            0, AV_INPUT_BUFFER_PADDING_SIZE);
 780     bitstream_init(&f->pre_bc, f->bitstream_buffer, 8 * prestream_size);
 781
 782     f->last_dc = 0 * 128 * 8 * 8;
 783
 784     for (y = 0; y < height; y += 16) {
 785         for (x = 0; x < width; x += 16) {
 786             if ((ret = decode_i_mb(f)) < 0)
 787                 return ret;
 788
 789             idct_put(f, x, y);
 790         }
 791     }
 792
 793     if (bitstream_read_vlc(&f->pre_bc, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
 794         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 795
 796     return 0;
 797 }
 798
 799 static int decode_frame(AVCodecContext *avctx, void *data,
 800                         int *got_frame, AVPacket *avpkt)
 801 {
 802     const uint8_t *buf    = avpkt->data;
 803     int buf_size          = avpkt->size;
 804     FourXContext *const f = avctx->priv_data;
 805     AVFrame *picture      = data;
 806     int i, frame_4cc, frame_size, ret;
 807
 808     if (buf_size < 20)
 809         return AVERROR_INVALIDDATA;
 810
 811     if (avctx->width % 16 || avctx->height % 16) {
 812         av_log(avctx, AV_LOG_ERROR,
 813                "Dimensions non-multiple of 16 are invalid.\n");
 814         return AVERROR_INVALIDDATA;
 815     }
 816
 817     if (buf_size < AV_RL32(buf + 4) + 8) {
 818         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %"PRIu32"\n",
 819                buf_size, AV_RL32(buf + 4));
 820         return AVERROR_INVALIDDATA;
 821     }
 822
 823     frame_4cc = AV_RL32(buf);
 824
 825     if (frame_4cc == AV_RL32("cfrm")) {
 826         int free_index       = -1;
 827         int id, whole_size;
 828         const int data_size  = buf_size - 20;
 829         CFrameBuffer *cfrm;
 830
 831         id         = AV_RL32(buf + 12);
 832         whole_size = AV_RL32(buf + 16);
 833
 834         for (i = 0; i < CFRAME_BUFFER_COUNT; i++)
 835             if (f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
 836                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n",
 837                        f->cfrm[i].id);
 838
 839         for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 840             if (f->cfrm[i].id == id)
 841                 break;
 842             if (f->cfrm[i].size == 0)
 843                 free_index = i;
 844         }
 845
 846         if (i >= CFRAME_BUFFER_COUNT) {
 847             i             = free_index;
 848             f->cfrm[i].id = id;
 849         }
 850         cfrm = &f->cfrm[i];
 851
 852         cfrm->data = av_fast_realloc(cfrm->data, &cfrm->allocated_size,
 853                                      cfrm->size + data_size + AV_INPUT_BUFFER_PADDING_SIZE);
 854         // explicit check needed as memcpy below might not catch a NULL
 855         if (!cfrm->data) {
 856             av_log(f->avctx, AV_LOG_ERROR, "realloc failure");
 857             return AVERROR(ENOMEM);
 858         }
 859
 860         memcpy(cfrm->data + cfrm->size, buf + 20, data_size);
 861         cfrm->size += data_size;
 862
 863         if (cfrm->size >= whole_size) {
 864             buf        = cfrm->data;
 865             frame_size = cfrm->size;
 866
 867             if (id != avctx->frame_number)
 868                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n",
 869                        id, avctx->frame_number);
 870
 871             if (f->version <= 1)
 872                 return AVERROR_INVALIDDATA;
 873
 874             cfrm->size = cfrm->id = 0;
 875             frame_4cc  = AV_RL32("pfrm");
 876         } else
 877             return buf_size;
 878     } else {
 879         buf        = buf      + 12;
 880         frame_size = buf_size - 12;
 881     }
 882
 883
 884     if ((ret = ff_get_buffer(avctx, picture, 0)) < 0) {
 885         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 886         return ret;
 887     }
 888
 889     if (frame_4cc == AV_RL32("ifr2")) {
 890         picture->pict_type = AV_PICTURE_TYPE_I;
 891         if ((ret = decode_i2_frame(f, buf - 4, frame_size + 4)) < 0)
 892             return ret;
 893     } else if (frame_4cc == AV_RL32("ifrm")) {
 894         picture->pict_type = AV_PICTURE_TYPE_I;
 895         if ((ret = decode_i_frame(f, buf, frame_size)) < 0)
 896             return ret;
 897     } else if (frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")) {
 898         picture->pict_type = AV_PICTURE_TYPE_P;
 899         if ((ret = decode_p_frame(f, buf, frame_size)) < 0)
 900             return ret;
 901     } else if (frame_4cc == AV_RL32("snd_")) {
 902         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n",
 903                buf_size);
 904     } else {
 905         av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n",
 906                buf_size);
 907     }
 908
 909     picture->key_frame = picture->pict_type == AV_PICTURE_TYPE_I;
 910
 911     av_image_copy_plane(picture->data[0], picture->linesize[0],
 912                         (const uint8_t*)f->frame_buffer,  avctx->width * 2,
 913                         avctx->width * 2, avctx->height);
 914     FFSWAP(uint16_t *, f->frame_buffer, f->last_frame_buffer);
 915
 916     *got_frame = 1;
 917
 918     emms_c();
 919
 920     return buf_size;
 921 }
 922
 923 static av_cold int decode_end(AVCodecContext *avctx)
 924 {
 925     FourXContext * const f = avctx->priv_data;
 926     int i;
 927
 928     av_freep(&f->frame_buffer);
 929     av_freep(&f->last_frame_buffer);
 930     av_freep(&f->bitstream_buffer);
 931     f->bitstream_buffer_size = 0;
 932     for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 933         av_freep(&f->cfrm[i].data);
 934         f->cfrm[i].allocated_size = 0;
 935     }
 936     ff_free_vlc(&f->pre_vlc);
 937
 938     return 0;
 939 }
 940
 941 static av_cold int decode_init(AVCodecContext *avctx)
 942 {
 943     FourXContext * const f = avctx->priv_data;
 944     int ret;
 945
 946     if (avctx->extradata_size != 4 || !avctx->extradata) {
 947         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
 948         return AVERROR_INVALIDDATA;
 949     }
 950
 951     ret = av_image_check_size(avctx->width, avctx->height, 0, avctx);
 952     if (ret < 0)
 953         return ret;
 954
 955     f->frame_buffer      = av_mallocz(avctx->width * avctx->height * 2);
 956     f->last_frame_buffer = av_mallocz(avctx->width * avctx->height * 2);
 957     if (!f->frame_buffer || !f->last_frame_buffer) {
 958         decode_end(avctx);
 959         return AVERROR(ENOMEM);
 960     }
 961
 962     f->version = AV_RL32(avctx->extradata) >> 16;
 963     ff_blockdsp_init(&f->bdsp, avctx);
 964     ff_bswapdsp_init(&f->bbdsp);
 965     f->avctx = avctx;
 966     init_vlcs(f);
 967
 968     if (f->version > 2)
 969         avctx->pix_fmt = AV_PIX_FMT_RGB565;
 970     else
 971         avctx->pix_fmt = AV_PIX_FMT_BGR555;
 972
 973     return 0;
 974 }
 975
 976 AVCodec ff_fourxm_decoder = {
 977     .name           = "4xm",
 978     .long_name      = NULL_IF_CONFIG_SMALL("4X Movie"),
 979     .type           = AVMEDIA_TYPE_VIDEO,
 980     .id             = AV_CODEC_ID_4XM,
 981     .priv_data_size = sizeof(FourXContext),
 982     .init           = decode_init,
 983     .close          = decode_end,
 984     .decode         = decode_frame,
 985     .capabilities   = AV_CODEC_CAP_DR1,
 986 };