git.sesse.net Git - ffmpeg/blob - libavcodec/4xm.c

   1 /*
   2  * 4XM codec
   3  * Copyright (c) 2003 Michael Niedermayer
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * 4XM codec.
  25  */
  26
  27 #include <inttypes.h>
  28
  29 #include "libavutil/frame.h"
  30 #include "libavutil/imgutils.h"
  31 #include "libavutil/intreadwrite.h"
  32
  33 #include "avcodec.h"
  34 #include "bitstream.h"
  35 #include "blockdsp.h"
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "internal.h"
  39 #include "vlc.h"
  40
  41 #define BLOCK_TYPE_VLC_BITS 5
  42 #define ACDC_VLC_BITS 9
  43
  44 #define CFRAME_BUFFER_COUNT 100
  45
  46 static const uint8_t block_type_tab[2][4][8][2] = {
  47     {
  48         {    // { 8, 4, 2 } x { 8, 4, 2}
  49             { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 30, 5 }, { 31, 5 }, { 0, 0 }
  50         }, { // { 8, 4 } x 1
  51             { 0, 1 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  52         }, { // 1 x { 8, 4 }
  53             { 0, 1 }, { 2, 2 }, { 0, 0 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  54         }, { // 1 x 2, 2 x 1
  55             { 0, 1 }, { 0, 0 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }
  56         }
  57     }, {
  58         {   // { 8, 4, 2 } x { 8, 4, 2}
  59             { 1, 2 }, { 4, 3 }, { 5, 3 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  60         }, {// { 8, 4 } x 1
  61             { 1, 2 }, { 0, 0 }, { 2, 2 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  62         }, {// 1 x { 8, 4 }
  63             { 1, 2 }, { 2, 2 }, { 0, 0 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  64         }, {// 1 x 2, 2 x 1
  65             { 1, 2 }, { 0, 0 }, { 0, 0 }, { 0, 2 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
  66       }
  67     }
  68 };
  69
  70 static const uint8_t size2index[4][4] = {
  71     { -1, 3, 1, 1 },
  72     {  3, 0, 0, 0 },
  73     {  2, 0, 0, 0 },
  74     {  2, 0, 0, 0 },
  75 };
  76
  77 static const int8_t mv[256][2] = {
  78     {   0,   0 }, {   0,  -1 }, {  -1,   0 }, {   1,   0 }, {   0,   1 }, {  -1,  -1 }, {   1,  -1 }, {  -1,   1 },
  79     {   1,   1 }, {   0,  -2 }, {  -2,   0 }, {   2,   0 }, {   0,   2 }, {  -1,  -2 }, {   1,  -2 }, {  -2,  -1 },
  80     {   2,  -1 }, {  -2,   1 }, {   2,   1 }, {  -1,   2 }, {   1,   2 }, {  -2,  -2 }, {   2,  -2 }, {  -2,   2 },
  81     {   2,   2 }, {   0,  -3 }, {  -3,   0 }, {   3,   0 }, {   0,   3 }, {  -1,  -3 }, {   1,  -3 }, {  -3,  -1 },
  82     {   3,  -1 }, {  -3,   1 }, {   3,   1 }, {  -1,   3 }, {   1,   3 }, {  -2,  -3 }, {   2,  -3 }, {  -3,  -2 },
  83     {   3,  -2 }, {  -3,   2 }, {   3,   2 }, {  -2,   3 }, {   2,   3 }, {   0,  -4 }, {  -4,   0 }, {   4,   0 },
  84     {   0,   4 }, {  -1,  -4 }, {   1,  -4 }, {  -4,  -1 }, {   4,  -1 }, {   4,   1 }, {  -1,   4 }, {   1,   4 },
  85     {  -3,  -3 }, {  -3,   3 }, {   3,   3 }, {  -2,  -4 }, {  -4,  -2 }, {   4,  -2 }, {  -4,   2 }, {  -2,   4 },
  86     {   2,   4 }, {  -3,  -4 }, {   3,  -4 }, {   4,  -3 }, {  -5,   0 }, {  -4,   3 }, {  -3,   4 }, {   3,   4 },
  87     {  -1,  -5 }, {  -5,  -1 }, {  -5,   1 }, {  -1,   5 }, {  -2,  -5 }, {   2,  -5 }, {   5,  -2 }, {   5,   2 },
  88     {  -4,  -4 }, {  -4,   4 }, {  -3,  -5 }, {  -5,  -3 }, {  -5,   3 }, {   3,   5 }, {  -6,   0 }, {   0,   6 },
  89     {  -6,  -1 }, {  -6,   1 }, {   1,   6 }, {   2,  -6 }, {  -6,   2 }, {   2,   6 }, {  -5,  -4 }, {   5,   4 },
  90     {   4,   5 }, {  -6,  -3 }, {   6,   3 }, {  -7,   0 }, {  -1,  -7 }, {   5,  -5 }, {  -7,   1 }, {  -1,   7 },
  91     {   4,  -6 }, {   6,   4 }, {  -2,  -7 }, {  -7,   2 }, {  -3,  -7 }, {   7,  -3 }, {   3,   7 }, {   6,  -5 },
  92     {   0,  -8 }, {  -1,  -8 }, {  -7,  -4 }, {  -8,   1 }, {   4,   7 }, {   2,  -8 }, {  -2,   8 }, {   6,   6 },
  93     {  -8,   3 }, {   5,  -7 }, {  -5,   7 }, {   8,  -4 }, {   0,  -9 }, {  -9,  -1 }, {   1,   9 }, {   7,  -6 },
  94     {  -7,   6 }, {  -5,  -8 }, {  -5,   8 }, {  -9,   3 }, {   9,  -4 }, {   7,  -7 }, {   8,  -6 }, {   6,   8 },
  95     {  10,   1 }, { -10,   2 }, {   9,  -5 }, {  10,  -3 }, {  -8,  -7 }, { -10,  -4 }, {   6,  -9 }, { -11,   0 },
  96     {  11,   1 }, { -11,  -2 }, {  -2,  11 }, {   7,  -9 }, {  -7,   9 }, {  10,   6 }, {  -4,  11 }, {   8,  -9 },
  97     {   8,   9 }, {   5,  11 }, {   7, -10 }, {  12,  -3 }, {  11,   6 }, {  -9,  -9 }, {   8,  10 }, {   5,  12 },
  98     { -11,   7 }, {  13,   2 }, {   6, -12 }, {  10,   9 }, { -11,   8 }, {  -7,  12 }, {   0,  14 }, {  14,  -2 },
  99     {  -9,  11 }, {  -6,  13 }, { -14,  -4 }, {  -5, -14 }, {   5,  14 }, { -15,  -1 }, { -14,  -6 }, {   3, -15 },
 100     {  11, -11 }, {  -7,  14 }, {  -5,  15 }, {   8, -14 }, {  15,   6 }, {   3,  16 }, {   7, -15 }, { -16,   5 },
 101     {   0,  17 }, { -16,  -6 }, { -10,  14 }, { -16,   7 }, {  12,  13 }, { -16,   8 }, { -17,   6 }, { -18,   3 },
 102     {  -7,  17 }, {  15,  11 }, {  16,  10 }, {   2, -19 }, {   3, -19 }, { -11, -16 }, { -18,   8 }, { -19,  -6 },
 103     {   2, -20 }, { -17, -11 }, { -10, -18 }, {   8,  19 }, { -21,  -1 }, { -20,   7 }, {  -4,  21 }, {  21,   5 },
 104     {  15,  16 }, {   2, -22 }, { -10, -20 }, { -22,   5 }, {  20, -11 }, {  -7, -22 }, { -12,  20 }, {  23,  -5 },
 105     {  13, -20 }, {  24,  -2 }, { -15,  19 }, { -11,  22 }, {  16,  19 }, {  23, -10 }, { -18, -18 }, {  -9, -24 },
 106     {  24, -10 }, {  -3,  26 }, { -23,  13 }, { -18, -20 }, {  17,  21 }, {  -4,  27 }, {  27,   6 }, {   1, -28 },
 107     { -11,  26 }, { -17, -23 }, {   7,  28 }, {  11, -27 }, {  29,   5 }, { -23, -19 }, { -28, -11 }, { -21,  22 },
 108     { -30,   7 }, { -17,  26 }, { -27,  16 }, {  13,  29 }, {  19, -26 }, {  10, -31 }, { -14, -30 }, {  20, -27 },
 109     { -29,  18 }, { -16, -31 }, { -28, -22 }, {  21, -30 }, { -25,  28 }, {  26, -29 }, {  25, -32 }, { -32, -32 }
 110 };
 111
 112 /* This is simply the scaled down elementwise product of the standard JPEG
 113  * quantizer table and the AAN premul table. */
 114 static const uint8_t dequant_table[64] = {
 115     16, 15, 13, 19, 24, 31, 28, 17,
 116     17, 23, 25, 31, 36, 63, 45, 21,
 117     18, 24, 27, 37, 52, 59, 49, 20,
 118     16, 28, 34, 40, 60, 80, 51, 20,
 119     18, 31, 48, 66, 68, 86, 56, 21,
 120     19, 38, 56, 59, 64, 64, 48, 20,
 121     27, 48, 55, 55, 56, 51, 35, 15,
 122     20, 35, 34, 32, 31, 22, 15,  8,
 123 };
 124
 125 static VLC block_type_vlc[2][4];
 126
 127
 128 typedef struct CFrameBuffer {
 129     unsigned int allocated_size;
 130     unsigned int size;
 131     int id;
 132     uint8_t *data;
 133 } CFrameBuffer;
 134
 135 typedef struct FourXContext {
 136     AVCodecContext *avctx;
 137     BlockDSPContext bdsp;
 138     BswapDSPContext bbdsp;
 139     uint16_t *frame_buffer;
 140     uint16_t *last_frame_buffer;
 141     BitstreamContext pre_bc;    // ac/dc prefix
 142     BitstreamContext bc;
 143     GetByteContext g;
 144     GetByteContext g2;
 145     int mv[256];
 146     VLC pre_vlc;
 147     int last_dc;
 148     DECLARE_ALIGNED(16, int16_t, block)[6][64];
 149     void *bitstream_buffer;
 150     unsigned int bitstream_buffer_size;
 151     int version;
 152     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 153 } FourXContext;
 154
 155
 156 #define FIX_1_082392200  70936
 157 #define FIX_1_414213562  92682
 158 #define FIX_1_847759065 121095
 159 #define FIX_2_613125930 171254
 160
 161 #define MULTIPLY(var, const) (((var) * (const)) >> 16)
 162
 163 static void idct(int16_t block[64])
 164 {
 165     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 166     int tmp10, tmp11, tmp12, tmp13;
 167     int z5, z10, z11, z12, z13;
 168     int i;
 169     int temp[64];
 170
 171     for (i = 0; i < 8; i++) {
 172         tmp10 = block[8 * 0 + i] + block[8 * 4 + i];
 173         tmp11 = block[8 * 0 + i] - block[8 * 4 + i];
 174
 175         tmp13 = block[8 * 2 + i] + block[8 * 6 + i];
 176         tmp12 = MULTIPLY(block[8 * 2 + i] - block[8 * 6 + i], FIX_1_414213562) - tmp13;
 177
 178         tmp0 = tmp10 + tmp13;
 179         tmp3 = tmp10 - tmp13;
 180         tmp1 = tmp11 + tmp12;
 181         tmp2 = tmp11 - tmp12;
 182
 183         z13 = block[8 * 5 + i] + block[8 * 3 + i];
 184         z10 = block[8 * 5 + i] - block[8 * 3 + i];
 185         z11 = block[8 * 1 + i] + block[8 * 7 + i];
 186         z12 = block[8 * 1 + i] - block[8 * 7 + i];
 187
 188         tmp7  =          z11 + z13;
 189         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 190
 191         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 192         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 193         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 194
 195         tmp6 = tmp12 - tmp7;
 196         tmp5 = tmp11 - tmp6;
 197         tmp4 = tmp10 + tmp5;
 198
 199         temp[8 * 0 + i] = tmp0 + tmp7;
 200         temp[8 * 7 + i] = tmp0 - tmp7;
 201         temp[8 * 1 + i] = tmp1 + tmp6;
 202         temp[8 * 6 + i] = tmp1 - tmp6;
 203         temp[8 * 2 + i] = tmp2 + tmp5;
 204         temp[8 * 5 + i] = tmp2 - tmp5;
 205         temp[8 * 4 + i] = tmp3 + tmp4;
 206         temp[8 * 3 + i] = tmp3 - tmp4;
 207     }
 208
 209     for (i = 0; i < 8 * 8; i += 8) {
 210         tmp10 = temp[0 + i] + temp[4 + i];
 211         tmp11 = temp[0 + i] - temp[4 + i];
 212
 213         tmp13 = temp[2 + i] + temp[6 + i];
 214         tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
 215
 216         tmp0 = tmp10 + tmp13;
 217         tmp3 = tmp10 - tmp13;
 218         tmp1 = tmp11 + tmp12;
 219         tmp2 = tmp11 - tmp12;
 220
 221         z13 = temp[5 + i] + temp[3 + i];
 222         z10 = temp[5 + i] - temp[3 + i];
 223         z11 = temp[1 + i] + temp[7 + i];
 224         z12 = temp[1 + i] - temp[7 + i];
 225
 226         tmp7  = z11 + z13;
 227         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 228
 229         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 230         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 231         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 232
 233         tmp6 = tmp12 - tmp7;
 234         tmp5 = tmp11 - tmp6;
 235         tmp4 = tmp10 + tmp5;
 236
 237         block[0 + i] = (tmp0 + tmp7) >> 6;
 238         block[7 + i] = (tmp0 - tmp7) >> 6;
 239         block[1 + i] = (tmp1 + tmp6) >> 6;
 240         block[6 + i] = (tmp1 - tmp6) >> 6;
 241         block[2 + i] = (tmp2 + tmp5) >> 6;
 242         block[5 + i] = (tmp2 - tmp5) >> 6;
 243         block[4 + i] = (tmp3 + tmp4) >> 6;
 244         block[3 + i] = (tmp3 - tmp4) >> 6;
 245     }
 246 }
 247
 248 static av_cold void init_vlcs(FourXContext *f)
 249 {
 250     static VLC_TYPE table[2][4][32][2];
 251     int i, j;
 252
 253     for (i = 0; i < 2; i++) {
 254         for (j = 0; j < 4; j++) {
 255             block_type_vlc[i][j].table           = table[i][j];
 256             block_type_vlc[i][j].table_allocated = 32;
 257             init_vlc(&block_type_vlc[i][j], BLOCK_TYPE_VLC_BITS, 7,
 258                      &block_type_tab[i][j][0][1], 2, 1,
 259                      &block_type_tab[i][j][0][0], 2, 1,
 260                      INIT_VLC_USE_NEW_STATIC);
 261         }
 262     }
 263 }
 264
 265 static void init_mv(FourXContext *f, int linesize)
 266 {
 267     int i;
 268
 269     for (i = 0; i < 256; i++) {
 270         if (f->version > 1)
 271             f->mv[i] = mv[i][0] + mv[i][1] * linesize / 2;
 272         else
 273             f->mv[i] = (i & 15) - 8 + ((i >> 4) - 8) * linesize / 2;
 274     }
 275 }
 276
 277 #if HAVE_BIGENDIAN
 278 #define LE_CENTRIC_MUL(dst, src, scale, dc)             \
 279     {                                                   \
 280         unsigned tmpval = AV_RN32(src);                 \
 281         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 282         tmpval = tmpval * (scale) + (dc);               \
 283         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 284         AV_WN32A(dst, tmpval);                          \
 285     }
 286 #else
 287 #define LE_CENTRIC_MUL(dst, src, scale, dc)              \
 288     {                                                    \
 289         unsigned tmpval = AV_RN32(src) * (scale) + (dc); \
 290         AV_WN32A(dst, tmpval);                           \
 291     }
 292 #endif
 293
 294 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w,
 295                         int h, int stride, int scale, unsigned dc)
 296 {
 297     int i;
 298     dc *= 0x10001;
 299
 300     switch (log2w) {
 301     case 0:
 302         for (i = 0; i < h; i++) {
 303             dst[0] = scale * src[0] + dc;
 304             if (scale)
 305                 src += stride;
 306             dst += stride;
 307         }
 308         break;
 309     case 1:
 310         for (i = 0; i < h; i++) {
 311             LE_CENTRIC_MUL(dst, src, scale, dc);
 312             if (scale)
 313                 src += stride;
 314             dst += stride;
 315         }
 316         break;
 317     case 2:
 318         for (i = 0; i < h; i++) {
 319             LE_CENTRIC_MUL(dst, src, scale, dc);
 320             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 321             if (scale)
 322                 src += stride;
 323             dst += stride;
 324         }
 325         break;
 326     case 3:
 327         for (i = 0; i < h; i++) {
 328             LE_CENTRIC_MUL(dst,     src,     scale, dc);
 329             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 330             LE_CENTRIC_MUL(dst + 4, src + 4, scale, dc);
 331             LE_CENTRIC_MUL(dst + 6, src + 6, scale, dc);
 332             if (scale)
 333                 src += stride;
 334             dst += stride;
 335         }
 336         break;
 337     default:
 338         break;
 339     }
 340 }
 341
 342 static int decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src,
 343                           int log2w, int log2h, int stride)
 344 {
 345     int index, h, code, ret, scale = 1;
 346     uint16_t *start, *end;
 347     unsigned dc = 0;
 348
 349     if (log2h < 0 || log2w < 0)
 350         return AVERROR_INVALIDDATA;
 351
 352     index = size2index[log2h][log2w];
 353     if (index < 0)
 354         return AVERROR_INVALIDDATA;
 355
 356     h     = 1 << log2h;
 357     code  = bitstream_read_vlc(&f->bc, block_type_vlc[1 - (f->version > 1)][index].table,
 358                                BLOCK_TYPE_VLC_BITS, 1);
 359     if (code < 0 || code > 6)
 360         return AVERROR_INVALIDDATA;
 361
 362     start = f->last_frame_buffer;
 363     end   = start + stride * (f->avctx->height - h + 1) - (1 << log2w);
 364
 365     if (code == 1) {
 366         if (--log2h < 0)
 367             return AVERROR_INVALIDDATA;
 368         if ((ret = decode_p_block(f, dst, src, log2w, log2h, stride)) < 0)
 369             return ret;
 370         return decode_p_block(f, dst + (stride << log2h),
 371                               src + (stride << log2h),
 372                               log2w, log2h, stride);
 373     } else if (code == 2) {
 374         log2w--;
 375         if ((ret = decode_p_block(f, dst , src, log2w, log2h, stride)) < 0)
 376             return ret;
 377         return decode_p_block(f, dst + (1 << log2w),
 378                               src + (1 << log2w),
 379                               log2w, log2h, stride);
 380     } else if (code == 6) {
 381         if (log2w) {
 382             dst[0]      = bytestream2_get_le16(&f->g2);
 383             dst[1]      = bytestream2_get_le16(&f->g2);
 384         } else {
 385             dst[0]      = bytestream2_get_le16(&f->g2);
 386             dst[stride] = bytestream2_get_le16(&f->g2);
 387         }
 388         return 0;
 389     }
 390
 391     if (code == 0) {
 392         src  += f->mv[bytestream2_get_byte(&f->g)];
 393     } else if (code == 3 && f->version >= 2) {
 394         return 0;
 395     } else if (code == 4) {
 396         src  += f->mv[bytestream2_get_byte(&f->g)];
 397         dc    = bytestream2_get_le16(&f->g2);
 398     } else if (code == 5) {
 399         scale = 0;
 400         dc    = bytestream2_get_le16(&f->g2);
 401     }
 402
 403     if (start > src || src > end) {
 404         av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 405         return AVERROR_INVALIDDATA;
 406     }
 407
 408     mcdc(dst, src, log2w, h, stride, scale, dc);
 409
 410     return 0;
 411 }
 412
 413 static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length)
 414 {
 415     int x, y;
 416     const int width  = f->avctx->width;
 417     const int height = f->avctx->height;
 418     uint16_t *dst    = f->frame_buffer;
 419     uint16_t *src;
 420     unsigned int bitstream_size, bytestream_size, wordstream_size, extra,
 421                  bytestream_offset, wordstream_offset;
 422     int ret;
 423
 424     src = f->last_frame_buffer;
 425
 426     if (f->version > 1) {
 427         if (length < 20)
 428             return AVERROR_INVALIDDATA;
 429         extra           = 20;
 430         bitstream_size  = AV_RL32(buf + 8);
 431         wordstream_size = AV_RL32(buf + 12);
 432         bytestream_size = AV_RL32(buf + 16);
 433     } else {
 434         extra           = 0;
 435         bitstream_size  = AV_RL16(buf - 4);
 436         wordstream_size = AV_RL16(buf - 2);
 437         bytestream_size = FFMAX(length - bitstream_size - wordstream_size, 0);
 438     }
 439
 440     if (bitstream_size + bytestream_size + wordstream_size + extra != length
 441         || bitstream_size  > (1 << 26)
 442         || bytestream_size > (1 << 26)
 443         || wordstream_size > (1 << 26)) {
 444         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n",
 445                bitstream_size, bytestream_size, wordstream_size,
 446                bitstream_size + bytestream_size + wordstream_size - length);
 447         return AVERROR_INVALIDDATA;
 448     }
 449
 450     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 451                    bitstream_size + AV_INPUT_BUFFER_PADDING_SIZE);
 452     if (!f->bitstream_buffer)
 453         return AVERROR(ENOMEM);
 454     f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) (buf + extra),
 455                        bitstream_size / 4);
 456     memset((uint8_t*)f->bitstream_buffer + bitstream_size,
 457            0, AV_INPUT_BUFFER_PADDING_SIZE);
 458     bitstream_init8(&f->bc, f->bitstream_buffer, bitstream_size);
 459
 460     wordstream_offset = extra + bitstream_size;
 461     bytestream_offset = extra + bitstream_size + wordstream_size;
 462     bytestream2_init(&f->g2, buf + wordstream_offset,
 463                      length - wordstream_offset);
 464     bytestream2_init(&f->g, buf + bytestream_offset,
 465                      length - bytestream_offset);
 466
 467     init_mv(f, width * 2);
 468
 469     for (y = 0; y < height; y += 8) {
 470         for (x = 0; x < width; x += 8)
 471             if ((ret = decode_p_block(f, dst + x, src + x, 3, 3, width)) < 0)
 472                 return ret;
 473         src += 8 * width;
 474         dst += 8 * width;
 475     }
 476
 477     return 0;
 478 }
 479
 480 /**
 481  * decode block and dequantize.
 482  * Note this is almost identical to MJPEG.
 483  */
 484 static int decode_i_block(FourXContext *f, int16_t *block)
 485 {
 486     int code, i, j, level, val;
 487
 488     /* DC coef */
 489     val = bitstream_read_vlc(&f->pre_bc, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 490     if (val >> 4)
 491         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 492
 493     if (val)
 494         val = bitstream_read_xbits(&f->bc, val);
 495
 496     val        = val * dequant_table[0] + f->last_dc;
 497     f->last_dc = block[0] = val;
 498     /* AC coefs */
 499     i = 1;
 500     for (;;) {
 501         code = bitstream_read_vlc(&f->pre_bc, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 502
 503         /* EOB */
 504         if (code == 0)
 505             break;
 506         if (code == 0xf0) {
 507             i += 16;
 508         } else {
 509             level = bitstream_read_xbits(&f->bc, code & 0xf);
 510             i    += code >> 4;
 511             if (i >= 64) {
 512                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
 513                 return 0;
 514             }
 515
 516             j = ff_zigzag_direct[i];
 517             block[j] = level * dequant_table[j];
 518             i++;
 519             if (i >= 64)
 520                 break;
 521         }
 522     }
 523
 524     return 0;
 525 }
 526
 527 static inline void idct_put(FourXContext *f, int x, int y)
 528 {
 529     int16_t (*block)[64] = f->block;
 530     int stride           = f->avctx->width;
 531     int i;
 532     uint16_t *dst = f->frame_buffer + y * stride + x;
 533
 534     for (i = 0; i < 4; i++) {
 535         block[i][0] += 0x80 * 8 * 8;
 536         idct(block[i]);
 537     }
 538
 539     if (!(f->avctx->flags & AV_CODEC_FLAG_GRAY)) {
 540         for (i = 4; i < 6; i++)
 541             idct(block[i]);
 542     }
 543
 544     /* Note transform is:
 545      * y  = ( 1b + 4g + 2r) / 14
 546      * cb = ( 3b - 2g - 1r) / 14
 547      * cr = (-1b - 4g + 5r) / 14 */
 548     for (y = 0; y < 8; y++) {
 549         for (x = 0; x < 8; x++) {
 550             int16_t *temp = block[(x >> 2) + 2 * (y >> 2)] +
 551                             2 * (x & 3) + 2 * 8 * (y & 3); // FIXME optimize
 552             int cb = block[4][x + 8 * y];
 553             int cr = block[5][x + 8 * y];
 554             int cg = (cb + cr) >> 1;
 555             int y;
 556
 557             cb += cb;
 558
 559             y               = temp[0];
 560             dst[0]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 561             y               = temp[1];
 562             dst[1]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 563             y               = temp[8];
 564             dst[stride]     = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 565             y               = temp[9];
 566             dst[1 + stride] = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 567             dst            += 2;
 568         }
 569         dst += 2 * stride - 2 * 8;
 570     }
 571 }
 572
 573 static int decode_i_mb(FourXContext *f)
 574 {
 575     int ret;
 576     int i;
 577
 578     f->bdsp.clear_blocks(f->block[0]);
 579
 580     for (i = 0; i < 6; i++)
 581         if ((ret = decode_i_block(f, f->block[i])) < 0)
 582             return ret;
 583
 584     return 0;
 585 }
 586
 587 static const uint8_t *read_huffman_tables(FourXContext *f,
 588                                           const uint8_t * const buf,
 589                                           int len)
 590 {
 591     int frequency[512] = { 0 };
 592     uint8_t flag[512];
 593     int up[512];
 594     uint8_t len_tab[257];
 595     int bits_tab[257];
 596     int start, end;
 597     const uint8_t *ptr = buf;
 598     int j;
 599
 600     memset(up, -1, sizeof(up));
 601
 602     start = *ptr++;
 603     end   = *ptr++;
 604     for (;;) {
 605         int i;
 606
 607         len -= end - start + 1;
 608
 609         if (end < start || len < 0)
 610             return NULL;
 611
 612         for (i = start; i <= end; i++)
 613             frequency[i] = *ptr++;
 614         start = *ptr++;
 615         if (start == 0)
 616             break;
 617
 618         if (--len < 0)
 619             return NULL;
 620
 621         end = *ptr++;
 622     }
 623     frequency[256] = 1;
 624
 625     while ((ptr - buf) & 3)
 626         ptr++; // 4byte align
 627
 628     for (j = 257; j < 512; j++) {
 629         int min_freq[2] = { 256 * 256, 256 * 256 };
 630         int smallest[2] = { 0, 0 };
 631         int i;
 632         for (i = 0; i < j; i++) {
 633             if (frequency[i] == 0)
 634                 continue;
 635             if (frequency[i] < min_freq[1]) {
 636                 if (frequency[i] < min_freq[0]) {
 637                     min_freq[1] = min_freq[0];
 638                     smallest[1] = smallest[0];
 639                     min_freq[0] = frequency[i];
 640                     smallest[0] = i;
 641                 } else {
 642                     min_freq[1] = frequency[i];
 643                     smallest[1] = i;
 644                 }
 645             }
 646         }
 647         if (min_freq[1] == 256 * 256)
 648             break;
 649
 650         frequency[j]           = min_freq[0] + min_freq[1];
 651         flag[smallest[0]]      = 0;
 652         flag[smallest[1]]      = 1;
 653         up[smallest[0]]        =
 654         up[smallest[1]]        = j;
 655         frequency[smallest[0]] = frequency[smallest[1]] = 0;
 656     }
 657
 658     for (j = 0; j < 257; j++) {
 659         int node, len = 0, bits = 0;
 660
 661         for (node = j; up[node] != -1; node = up[node]) {
 662             bits += flag[node] << len;
 663             len++;
 664             if (len > 31)
 665                 // can this happen at all ?
 666                 av_log(f->avctx, AV_LOG_ERROR,
 667                        "vlc length overflow\n");
 668         }
 669
 670         bits_tab[j] = bits;
 671         len_tab[j]  = len;
 672     }
 673
 674     if (init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, len_tab, 1, 1,
 675                  bits_tab, 4, 4, 0))
 676         return NULL;
 677
 678     return ptr;
 679 }
 680
 681 static int mix(int c0, int c1)
 682 {
 683     int blue  =  2 * (c0 & 0x001F) + (c1 & 0x001F);
 684     int green = (2 * (c0 & 0x03E0) + (c1 & 0x03E0)) >> 5;
 685     int red   =  2 * (c0 >> 10)    + (c1 >> 10);
 686     return red / 3 * 1024 + green / 3 * 32 + blue / 3;
 687 }
 688
 689 static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length)
 690 {
 691     int x, y, x2, y2;
 692     const int width  = f->avctx->width;
 693     const int height = f->avctx->height;
 694     const int mbs    = (FFALIGN(width, 16) >> 4) * (FFALIGN(height, 16) >> 4);
 695     uint16_t *dst    = f->frame_buffer;
 696     GetByteContext g3;
 697
 698     if (length < mbs * 8) {
 699         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 700         return AVERROR_INVALIDDATA;
 701     }
 702     bytestream2_init(&g3, buf, length);
 703
 704     for (y = 0; y < height; y += 16) {
 705         for (x = 0; x < width; x += 16) {
 706             unsigned int color[4] = { 0 }, bits;
 707             // warning following is purely guessed ...
 708             color[0] = bytestream2_get_le16u(&g3);
 709             color[1] = bytestream2_get_le16u(&g3);
 710
 711             if (color[0] & 0x8000)
 712                 av_log(f->avctx, AV_LOG_ERROR, "unk bit 1\n");
 713             if (color[1] & 0x8000)
 714                 av_log(f->avctx, AV_LOG_ERROR, "unk bit 2\n");
 715
 716             color[2] = mix(color[0], color[1]);
 717             color[3] = mix(color[1], color[0]);
 718
 719             bits = bytestream2_get_le32u(&g3);
 720             for (y2 = 0; y2 < 16; y2++) {
 721                 for (x2 = 0; x2 < 16; x2++) {
 722                     int index = 2 * (x2 >> 2) + 8 * (y2 >> 2);
 723                     dst[y2 * width + x2] = color[(bits >> index) & 3];
 724                 }
 725             }
 726             dst += 16;
 727         }
 728         dst += 16 * width - x;
 729     }
 730
 731     return 0;
 732 }
 733
 734 static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length)
 735 {
 736     int x, y, ret;
 737     const int width  = f->avctx->width;
 738     const int height = f->avctx->height;
 739     const unsigned int bitstream_size = AV_RL32(buf);
 740     int token_count av_unused;
 741     unsigned int prestream_size;
 742     const uint8_t *prestream;
 743
 744     if (bitstream_size > (1 << 26))
 745         return AVERROR_INVALIDDATA;
 746
 747     if (length < bitstream_size + 12) {
 748         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 749         return AVERROR_INVALIDDATA;
 750     }
 751
 752     token_count    =     AV_RL32(buf + bitstream_size + 8);
 753     prestream_size = 4 * AV_RL32(buf + bitstream_size + 4);
 754     prestream      =             buf + bitstream_size + 12;
 755
 756     if (prestream_size + bitstream_size + 12 != length
 757         || prestream_size > (1 << 26)) {
 758         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n",
 759                prestream_size, bitstream_size, length);
 760         return AVERROR_INVALIDDATA;
 761     }
 762
 763     prestream = read_huffman_tables(f, prestream, prestream_size);
 764     if (!prestream) {
 765         av_log(f->avctx, AV_LOG_ERROR, "Error reading Huffman tables.\n");
 766         return AVERROR_INVALIDDATA;
 767     }
 768
 769     bitstream_init8(&f->bc, buf + 4, bitstream_size);
 770
 771     prestream_size = length + buf - prestream;
 772
 773     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 774                    prestream_size + AV_INPUT_BUFFER_PADDING_SIZE);
 775     if (!f->bitstream_buffer)
 776         return AVERROR(ENOMEM);
 777     f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) prestream,
 778                        prestream_size / 4);
 779     memset((uint8_t*)f->bitstream_buffer + prestream_size,
 780            0, AV_INPUT_BUFFER_PADDING_SIZE);
 781     bitstream_init8(&f->pre_bc, f->bitstream_buffer, prestream_size);
 782
 783     f->last_dc = 0 * 128 * 8 * 8;
 784
 785     for (y = 0; y < height; y += 16) {
 786         for (x = 0; x < width; x += 16) {
 787             if ((ret = decode_i_mb(f)) < 0)
 788                 return ret;
 789
 790             idct_put(f, x, y);
 791         }
 792     }
 793
 794     if (bitstream_read_vlc(&f->pre_bc, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
 795         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 796
 797     return 0;
 798 }
 799
 800 static int decode_frame(AVCodecContext *avctx, void *data,
 801                         int *got_frame, AVPacket *avpkt)
 802 {
 803     const uint8_t *buf    = avpkt->data;
 804     int buf_size          = avpkt->size;
 805     FourXContext *const f = avctx->priv_data;
 806     AVFrame *picture      = data;
 807     int i, frame_4cc, frame_size, ret;
 808
 809     if (buf_size < 20)
 810         return AVERROR_INVALIDDATA;
 811
 812     if (avctx->width % 16 || avctx->height % 16) {
 813         av_log(avctx, AV_LOG_ERROR,
 814                "Dimensions non-multiple of 16 are invalid.\n");
 815         return AVERROR_INVALIDDATA;
 816     }
 817
 818     if (buf_size < AV_RL32(buf + 4) + 8) {
 819         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %"PRIu32"\n",
 820                buf_size, AV_RL32(buf + 4));
 821         return AVERROR_INVALIDDATA;
 822     }
 823
 824     frame_4cc = AV_RL32(buf);
 825
 826     if (frame_4cc == AV_RL32("cfrm")) {
 827         int free_index       = -1;
 828         int id, whole_size;
 829         const int data_size  = buf_size - 20;
 830         CFrameBuffer *cfrm;
 831
 832         id         = AV_RL32(buf + 12);
 833         whole_size = AV_RL32(buf + 16);
 834
 835         for (i = 0; i < CFRAME_BUFFER_COUNT; i++)
 836             if (f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
 837                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n",
 838                        f->cfrm[i].id);
 839
 840         for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 841             if (f->cfrm[i].id == id)
 842                 break;
 843             if (f->cfrm[i].size == 0)
 844                 free_index = i;
 845         }
 846
 847         if (i >= CFRAME_BUFFER_COUNT) {
 848             i             = free_index;
 849             f->cfrm[i].id = id;
 850         }
 851         cfrm = &f->cfrm[i];
 852
 853         cfrm->data = av_fast_realloc(cfrm->data, &cfrm->allocated_size,
 854                                      cfrm->size + data_size + AV_INPUT_BUFFER_PADDING_SIZE);
 855         // explicit check needed as memcpy below might not catch a NULL
 856         if (!cfrm->data) {
 857             av_log(f->avctx, AV_LOG_ERROR, "realloc failure");
 858             return AVERROR(ENOMEM);
 859         }
 860
 861         memcpy(cfrm->data + cfrm->size, buf + 20, data_size);
 862         cfrm->size += data_size;
 863
 864         if (cfrm->size >= whole_size) {
 865             buf        = cfrm->data;
 866             frame_size = cfrm->size;
 867
 868             if (id != avctx->frame_number)
 869                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n",
 870                        id, avctx->frame_number);
 871
 872             if (f->version <= 1)
 873                 return AVERROR_INVALIDDATA;
 874
 875             cfrm->size = cfrm->id = 0;
 876             frame_4cc  = AV_RL32("pfrm");
 877         } else
 878             return buf_size;
 879     } else {
 880         buf        = buf      + 12;
 881         frame_size = buf_size - 12;
 882     }
 883
 884
 885     if ((ret = ff_get_buffer(avctx, picture, 0)) < 0) {
 886         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 887         return ret;
 888     }
 889
 890     if (frame_4cc == AV_RL32("ifr2")) {
 891         picture->pict_type = AV_PICTURE_TYPE_I;
 892         if ((ret = decode_i2_frame(f, buf - 4, frame_size + 4)) < 0)
 893             return ret;
 894     } else if (frame_4cc == AV_RL32("ifrm")) {
 895         picture->pict_type = AV_PICTURE_TYPE_I;
 896         if ((ret = decode_i_frame(f, buf, frame_size)) < 0)
 897             return ret;
 898     } else if (frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")) {
 899         picture->pict_type = AV_PICTURE_TYPE_P;
 900         if ((ret = decode_p_frame(f, buf, frame_size)) < 0)
 901             return ret;
 902     } else if (frame_4cc == AV_RL32("snd_")) {
 903         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n",
 904                buf_size);
 905     } else {
 906         av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n",
 907                buf_size);
 908     }
 909
 910     picture->key_frame = picture->pict_type == AV_PICTURE_TYPE_I;
 911
 912     av_image_copy_plane(picture->data[0], picture->linesize[0],
 913                         (const uint8_t*)f->frame_buffer,  avctx->width * 2,
 914                         avctx->width * 2, avctx->height);
 915     FFSWAP(uint16_t *, f->frame_buffer, f->last_frame_buffer);
 916
 917     *got_frame = 1;
 918
 919     emms_c();
 920
 921     return buf_size;
 922 }
 923
 924 static av_cold int decode_end(AVCodecContext *avctx)
 925 {
 926     FourXContext * const f = avctx->priv_data;
 927     int i;
 928
 929     av_freep(&f->frame_buffer);
 930     av_freep(&f->last_frame_buffer);
 931     av_freep(&f->bitstream_buffer);
 932     f->bitstream_buffer_size = 0;
 933     for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 934         av_freep(&f->cfrm[i].data);
 935         f->cfrm[i].allocated_size = 0;
 936     }
 937     ff_free_vlc(&f->pre_vlc);
 938
 939     return 0;
 940 }
 941
 942 static av_cold int decode_init(AVCodecContext *avctx)
 943 {
 944     FourXContext * const f = avctx->priv_data;
 945     int ret;
 946
 947     if (avctx->extradata_size != 4 || !avctx->extradata) {
 948         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
 949         return AVERROR_INVALIDDATA;
 950     }
 951
 952     ret = av_image_check_size(avctx->width, avctx->height, 0, avctx);
 953     if (ret < 0)
 954         return ret;
 955
 956     f->frame_buffer      = av_mallocz(avctx->width * avctx->height * 2);
 957     f->last_frame_buffer = av_mallocz(avctx->width * avctx->height * 2);
 958     if (!f->frame_buffer || !f->last_frame_buffer) {
 959         decode_end(avctx);
 960         return AVERROR(ENOMEM);
 961     }
 962
 963     f->version = AV_RL32(avctx->extradata) >> 16;
 964     ff_blockdsp_init(&f->bdsp);
 965     ff_bswapdsp_init(&f->bbdsp);
 966     f->avctx = avctx;
 967     init_vlcs(f);
 968
 969     if (f->version > 2)
 970         avctx->pix_fmt = AV_PIX_FMT_RGB565;
 971     else
 972         avctx->pix_fmt = AV_PIX_FMT_BGR555;
 973
 974     return 0;
 975 }
 976
 977 AVCodec ff_fourxm_decoder = {
 978     .name           = "4xm",
 979     .long_name      = NULL_IF_CONFIG_SMALL("4X Movie"),
 980     .type           = AVMEDIA_TYPE_VIDEO,
 981     .id             = AV_CODEC_ID_4XM,
 982     .priv_data_size = sizeof(FourXContext),
 983     .init           = decode_init,
 984     .close          = decode_end,
 985     .decode         = decode_frame,
 986     .capabilities   = AV_CODEC_CAP_DR1,
 987 };