git.sesse.net Git - ffmpeg/blob - libavcodec/4xm.c

   1 /*
   2  * 4XM codec
   3  * Copyright (c) 2003 Michael Niedermayer
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * 4XM codec.
  25  */
  26
  27 #include "libavutil/frame.h"
  28 #include "libavutil/imgutils.h"
  29 #include "libavutil/intreadwrite.h"
  30 #include "avcodec.h"
  31 #include "bytestream.h"
  32 #include "dsputil.h"
  33 #include "get_bits.h"
  34 #include "internal.h"
  35
  36 #define BLOCK_TYPE_VLC_BITS 5
  37 #define ACDC_VLC_BITS 9
  38
  39 #define CFRAME_BUFFER_COUNT 100
  40
  41 static const uint8_t block_type_tab[2][4][8][2] = {
  42     {
  43         {    // { 8, 4, 2 } x { 8, 4, 2}
  44             { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 30, 5 }, { 31, 5 }, { 0, 0 }
  45         }, { // { 8, 4 } x 1
  46             { 0, 1 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  47         }, { // 1 x { 8, 4 }
  48             { 0, 1 }, { 2, 2 }, { 0, 0 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  49         }, { // 1 x 2, 2 x 1
  50             { 0, 1 }, { 0, 0 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }
  51         }
  52     }, {
  53         {   // { 8, 4, 2 } x { 8, 4, 2}
  54             { 1, 2 }, { 4, 3 }, { 5, 3 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  55         }, {// { 8, 4 } x 1
  56             { 1, 2 }, { 0, 0 }, { 2, 2 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  57         }, {// 1 x { 8, 4 }
  58             { 1, 2 }, { 2, 2 }, { 0, 0 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  59         }, {// 1 x 2, 2 x 1
  60             { 1, 2 }, { 0, 0 }, { 0, 0 }, { 0, 2 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
  61       }
  62     }
  63 };
  64
  65 static const uint8_t size2index[4][4] = {
  66     { -1, 3, 1, 1 },
  67     {  3, 0, 0, 0 },
  68     {  2, 0, 0, 0 },
  69     {  2, 0, 0, 0 },
  70 };
  71
  72 static const int8_t mv[256][2] = {
  73     {   0,   0 }, {   0,  -1 }, {  -1,   0 }, {   1,   0 }, {   0,   1 }, {  -1,  -1 }, {   1,  -1 }, {  -1,   1 },
  74     {   1,   1 }, {   0,  -2 }, {  -2,   0 }, {   2,   0 }, {   0,   2 }, {  -1,  -2 }, {   1,  -2 }, {  -2,  -1 },
  75     {   2,  -1 }, {  -2,   1 }, {   2,   1 }, {  -1,   2 }, {   1,   2 }, {  -2,  -2 }, {   2,  -2 }, {  -2,   2 },
  76     {   2,   2 }, {   0,  -3 }, {  -3,   0 }, {   3,   0 }, {   0,   3 }, {  -1,  -3 }, {   1,  -3 }, {  -3,  -1 },
  77     {   3,  -1 }, {  -3,   1 }, {   3,   1 }, {  -1,   3 }, {   1,   3 }, {  -2,  -3 }, {   2,  -3 }, {  -3,  -2 },
  78     {   3,  -2 }, {  -3,   2 }, {   3,   2 }, {  -2,   3 }, {   2,   3 }, {   0,  -4 }, {  -4,   0 }, {   4,   0 },
  79     {   0,   4 }, {  -1,  -4 }, {   1,  -4 }, {  -4,  -1 }, {   4,  -1 }, {   4,   1 }, {  -1,   4 }, {   1,   4 },
  80     {  -3,  -3 }, {  -3,   3 }, {   3,   3 }, {  -2,  -4 }, {  -4,  -2 }, {   4,  -2 }, {  -4,   2 }, {  -2,   4 },
  81     {   2,   4 }, {  -3,  -4 }, {   3,  -4 }, {   4,  -3 }, {  -5,   0 }, {  -4,   3 }, {  -3,   4 }, {   3,   4 },
  82     {  -1,  -5 }, {  -5,  -1 }, {  -5,   1 }, {  -1,   5 }, {  -2,  -5 }, {   2,  -5 }, {   5,  -2 }, {   5,   2 },
  83     {  -4,  -4 }, {  -4,   4 }, {  -3,  -5 }, {  -5,  -3 }, {  -5,   3 }, {   3,   5 }, {  -6,   0 }, {   0,   6 },
  84     {  -6,  -1 }, {  -6,   1 }, {   1,   6 }, {   2,  -6 }, {  -6,   2 }, {   2,   6 }, {  -5,  -4 }, {   5,   4 },
  85     {   4,   5 }, {  -6,  -3 }, {   6,   3 }, {  -7,   0 }, {  -1,  -7 }, {   5,  -5 }, {  -7,   1 }, {  -1,   7 },
  86     {   4,  -6 }, {   6,   4 }, {  -2,  -7 }, {  -7,   2 }, {  -3,  -7 }, {   7,  -3 }, {   3,   7 }, {   6,  -5 },
  87     {   0,  -8 }, {  -1,  -8 }, {  -7,  -4 }, {  -8,   1 }, {   4,   7 }, {   2,  -8 }, {  -2,   8 }, {   6,   6 },
  88     {  -8,   3 }, {   5,  -7 }, {  -5,   7 }, {   8,  -4 }, {   0,  -9 }, {  -9,  -1 }, {   1,   9 }, {   7,  -6 },
  89     {  -7,   6 }, {  -5,  -8 }, {  -5,   8 }, {  -9,   3 }, {   9,  -4 }, {   7,  -7 }, {   8,  -6 }, {   6,   8 },
  90     {  10,   1 }, { -10,   2 }, {   9,  -5 }, {  10,  -3 }, {  -8,  -7 }, { -10,  -4 }, {   6,  -9 }, { -11,   0 },
  91     {  11,   1 }, { -11,  -2 }, {  -2,  11 }, {   7,  -9 }, {  -7,   9 }, {  10,   6 }, {  -4,  11 }, {   8,  -9 },
  92     {   8,   9 }, {   5,  11 }, {   7, -10 }, {  12,  -3 }, {  11,   6 }, {  -9,  -9 }, {   8,  10 }, {   5,  12 },
  93     { -11,   7 }, {  13,   2 }, {   6, -12 }, {  10,   9 }, { -11,   8 }, {  -7,  12 }, {   0,  14 }, {  14,  -2 },
  94     {  -9,  11 }, {  -6,  13 }, { -14,  -4 }, {  -5, -14 }, {   5,  14 }, { -15,  -1 }, { -14,  -6 }, {   3, -15 },
  95     {  11, -11 }, {  -7,  14 }, {  -5,  15 }, {   8, -14 }, {  15,   6 }, {   3,  16 }, {   7, -15 }, { -16,   5 },
  96     {   0,  17 }, { -16,  -6 }, { -10,  14 }, { -16,   7 }, {  12,  13 }, { -16,   8 }, { -17,   6 }, { -18,   3 },
  97     {  -7,  17 }, {  15,  11 }, {  16,  10 }, {   2, -19 }, {   3, -19 }, { -11, -16 }, { -18,   8 }, { -19,  -6 },
  98     {   2, -20 }, { -17, -11 }, { -10, -18 }, {   8,  19 }, { -21,  -1 }, { -20,   7 }, {  -4,  21 }, {  21,   5 },
  99     {  15,  16 }, {   2, -22 }, { -10, -20 }, { -22,   5 }, {  20, -11 }, {  -7, -22 }, { -12,  20 }, {  23,  -5 },
 100     {  13, -20 }, {  24,  -2 }, { -15,  19 }, { -11,  22 }, {  16,  19 }, {  23, -10 }, { -18, -18 }, {  -9, -24 },
 101     {  24, -10 }, {  -3,  26 }, { -23,  13 }, { -18, -20 }, {  17,  21 }, {  -4,  27 }, {  27,   6 }, {   1, -28 },
 102     { -11,  26 }, { -17, -23 }, {   7,  28 }, {  11, -27 }, {  29,   5 }, { -23, -19 }, { -28, -11 }, { -21,  22 },
 103     { -30,   7 }, { -17,  26 }, { -27,  16 }, {  13,  29 }, {  19, -26 }, {  10, -31 }, { -14, -30 }, {  20, -27 },
 104     { -29,  18 }, { -16, -31 }, { -28, -22 }, {  21, -30 }, { -25,  28 }, {  26, -29 }, {  25, -32 }, { -32, -32 }
 105 };
 106
 107 /* This is simply the scaled down elementwise product of the standard JPEG
 108  * quantizer table and the AAN premul table. */
 109 static const uint8_t dequant_table[64] = {
 110     16, 15, 13, 19, 24, 31, 28, 17,
 111     17, 23, 25, 31, 36, 63, 45, 21,
 112     18, 24, 27, 37, 52, 59, 49, 20,
 113     16, 28, 34, 40, 60, 80, 51, 20,
 114     18, 31, 48, 66, 68, 86, 56, 21,
 115     19, 38, 56, 59, 64, 64, 48, 20,
 116     27, 48, 55, 55, 56, 51, 35, 15,
 117     20, 35, 34, 32, 31, 22, 15,  8,
 118 };
 119
 120 static VLC block_type_vlc[2][4];
 121
 122
 123 typedef struct CFrameBuffer {
 124     unsigned int allocated_size;
 125     unsigned int size;
 126     int id;
 127     uint8_t *data;
 128 } CFrameBuffer;
 129
 130 typedef struct FourXContext {
 131     AVCodecContext *avctx;
 132     DSPContext dsp;
 133     uint16_t *frame_buffer;
 134     uint16_t *last_frame_buffer;
 135     GetBitContext pre_gb;          ///< ac/dc prefix
 136     GetBitContext gb;
 137     GetByteContext g;
 138     GetByteContext g2;
 139     int mv[256];
 140     VLC pre_vlc;
 141     int last_dc;
 142     DECLARE_ALIGNED(16, int16_t, block)[6][64];
 143     void *bitstream_buffer;
 144     unsigned int bitstream_buffer_size;
 145     int version;
 146     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 147 } FourXContext;
 148
 149
 150 #define FIX_1_082392200  70936
 151 #define FIX_1_414213562  92682
 152 #define FIX_1_847759065 121095
 153 #define FIX_2_613125930 171254
 154
 155 #define MULTIPLY(var, const) (((var) * (const)) >> 16)
 156
 157 static void idct(int16_t block[64])
 158 {
 159     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 160     int tmp10, tmp11, tmp12, tmp13;
 161     int z5, z10, z11, z12, z13;
 162     int i;
 163     int temp[64];
 164
 165     for (i = 0; i < 8; i++) {
 166         tmp10 = block[8 * 0 + i] + block[8 * 4 + i];
 167         tmp11 = block[8 * 0 + i] - block[8 * 4 + i];
 168
 169         tmp13 = block[8 * 2 + i] + block[8 * 6 + i];
 170         tmp12 = MULTIPLY(block[8 * 2 + i] - block[8 * 6 + i], FIX_1_414213562) - tmp13;
 171
 172         tmp0 = tmp10 + tmp13;
 173         tmp3 = tmp10 - tmp13;
 174         tmp1 = tmp11 + tmp12;
 175         tmp2 = tmp11 - tmp12;
 176
 177         z13 = block[8 * 5 + i] + block[8 * 3 + i];
 178         z10 = block[8 * 5 + i] - block[8 * 3 + i];
 179         z11 = block[8 * 1 + i] + block[8 * 7 + i];
 180         z12 = block[8 * 1 + i] - block[8 * 7 + i];
 181
 182         tmp7  =          z11 + z13;
 183         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 184
 185         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 186         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 187         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 188
 189         tmp6 = tmp12 - tmp7;
 190         tmp5 = tmp11 - tmp6;
 191         tmp4 = tmp10 + tmp5;
 192
 193         temp[8 * 0 + i] = tmp0 + tmp7;
 194         temp[8 * 7 + i] = tmp0 - tmp7;
 195         temp[8 * 1 + i] = tmp1 + tmp6;
 196         temp[8 * 6 + i] = tmp1 - tmp6;
 197         temp[8 * 2 + i] = tmp2 + tmp5;
 198         temp[8 * 5 + i] = tmp2 - tmp5;
 199         temp[8 * 4 + i] = tmp3 + tmp4;
 200         temp[8 * 3 + i] = tmp3 - tmp4;
 201     }
 202
 203     for (i = 0; i < 8 * 8; i += 8) {
 204         tmp10 = temp[0 + i] + temp[4 + i];
 205         tmp11 = temp[0 + i] - temp[4 + i];
 206
 207         tmp13 = temp[2 + i] + temp[6 + i];
 208         tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
 209
 210         tmp0 = tmp10 + tmp13;
 211         tmp3 = tmp10 - tmp13;
 212         tmp1 = tmp11 + tmp12;
 213         tmp2 = tmp11 - tmp12;
 214
 215         z13 = temp[5 + i] + temp[3 + i];
 216         z10 = temp[5 + i] - temp[3 + i];
 217         z11 = temp[1 + i] + temp[7 + i];
 218         z12 = temp[1 + i] - temp[7 + i];
 219
 220         tmp7  = z11 + z13;
 221         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 222
 223         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 224         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 225         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 226
 227         tmp6 = tmp12 - tmp7;
 228         tmp5 = tmp11 - tmp6;
 229         tmp4 = tmp10 + tmp5;
 230
 231         block[0 + i] = (tmp0 + tmp7) >> 6;
 232         block[7 + i] = (tmp0 - tmp7) >> 6;
 233         block[1 + i] = (tmp1 + tmp6) >> 6;
 234         block[6 + i] = (tmp1 - tmp6) >> 6;
 235         block[2 + i] = (tmp2 + tmp5) >> 6;
 236         block[5 + i] = (tmp2 - tmp5) >> 6;
 237         block[4 + i] = (tmp3 + tmp4) >> 6;
 238         block[3 + i] = (tmp3 - tmp4) >> 6;
 239     }
 240 }
 241
 242 static av_cold void init_vlcs(FourXContext *f)
 243 {
 244     static VLC_TYPE table[2][4][32][2];
 245     int i, j;
 246
 247     for (i = 0; i < 2; i++) {
 248         for (j = 0; j < 4; j++) {
 249             block_type_vlc[i][j].table           = table[i][j];
 250             block_type_vlc[i][j].table_allocated = 32;
 251             init_vlc(&block_type_vlc[i][j], BLOCK_TYPE_VLC_BITS, 7,
 252                      &block_type_tab[i][j][0][1], 2, 1,
 253                      &block_type_tab[i][j][0][0], 2, 1,
 254                      INIT_VLC_USE_NEW_STATIC);
 255         }
 256     }
 257 }
 258
 259 static void init_mv(FourXContext *f, int linesize)
 260 {
 261     int i;
 262
 263     for (i = 0; i < 256; i++) {
 264         if (f->version > 1)
 265             f->mv[i] = mv[i][0] + mv[i][1] * linesize / 2;
 266         else
 267             f->mv[i] = (i & 15) - 8 + ((i >> 4) - 8) * linesize / 2;
 268     }
 269 }
 270
 271 #if HAVE_BIGENDIAN
 272 #define LE_CENTRIC_MUL(dst, src, scale, dc)             \
 273     {                                                   \
 274         unsigned tmpval = AV_RN32(src);                 \
 275         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 276         tmpval = tmpval * (scale) + (dc);               \
 277         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 278         AV_WN32A(dst, tmpval);                          \
 279     }
 280 #else
 281 #define LE_CENTRIC_MUL(dst, src, scale, dc)              \
 282     {                                                    \
 283         unsigned tmpval = AV_RN32(src) * (scale) + (dc); \
 284         AV_WN32A(dst, tmpval);                           \
 285     }
 286 #endif
 287
 288 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w,
 289                         int h, int stride, int scale, unsigned dc)
 290 {
 291     int i;
 292     dc *= 0x10001;
 293
 294     switch (log2w) {
 295     case 0:
 296         for (i = 0; i < h; i++) {
 297             dst[0] = scale * src[0] + dc;
 298             if (scale)
 299                 src += stride;
 300             dst += stride;
 301         }
 302         break;
 303     case 1:
 304         for (i = 0; i < h; i++) {
 305             LE_CENTRIC_MUL(dst, src, scale, dc);
 306             if (scale)
 307                 src += stride;
 308             dst += stride;
 309         }
 310         break;
 311     case 2:
 312         for (i = 0; i < h; i++) {
 313             LE_CENTRIC_MUL(dst, src, scale, dc);
 314             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 315             if (scale)
 316                 src += stride;
 317             dst += stride;
 318         }
 319         break;
 320     case 3:
 321         for (i = 0; i < h; i++) {
 322             LE_CENTRIC_MUL(dst,     src,     scale, dc);
 323             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 324             LE_CENTRIC_MUL(dst + 4, src + 4, scale, dc);
 325             LE_CENTRIC_MUL(dst + 6, src + 6, scale, dc);
 326             if (scale)
 327                 src += stride;
 328             dst += stride;
 329         }
 330         break;
 331     default:
 332         break;
 333     }
 334 }
 335
 336 static int decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src,
 337                           int log2w, int log2h, int stride)
 338 {
 339     const int index = size2index[log2h][log2w];
 340     const int h     = 1 << log2h;
 341     int code        = get_vlc2(&f->gb,
 342                                block_type_vlc[1 - (f->version > 1)][index].table,
 343                                BLOCK_TYPE_VLC_BITS, 1);
 344     uint16_t *start = f->last_frame_buffer;
 345     uint16_t *end   = start + stride * (f->avctx->height - h + 1) - (1 << log2w);
 346     int ret;
 347     int scale   = 1;
 348     unsigned dc = 0;
 349
 350     if (code < 0 || code > 6 || log2w < 0)
 351         return AVERROR_INVALIDDATA;
 352
 353     if (code == 1) {
 354         log2h--;
 355         if ((ret = decode_p_block(f, dst, src, log2w, log2h, stride)) < 0)
 356             return ret;
 357         return decode_p_block(f, dst + (stride << log2h),
 358                               src + (stride << log2h),
 359                               log2w, log2h, stride);
 360     } else if (code == 2) {
 361         log2w--;
 362         if ((ret = decode_p_block(f, dst , src, log2w, log2h, stride)) < 0)
 363             return ret;
 364         return decode_p_block(f, dst + (1 << log2w),
 365                               src + (1 << log2w),
 366                               log2w, log2h, stride);
 367     } else if (code == 6) {
 368         if (log2w) {
 369             dst[0]      = bytestream2_get_le16(&f->g2);
 370             dst[1]      = bytestream2_get_le16(&f->g2);
 371         } else {
 372             dst[0]      = bytestream2_get_le16(&f->g2);
 373             dst[stride] = bytestream2_get_le16(&f->g2);
 374         }
 375         return 0;
 376     }
 377
 378     if (code == 0) {
 379         src  += f->mv[bytestream2_get_byte(&f->g)];
 380     } else if (code == 3 && f->version >= 2) {
 381         return 0;
 382     } else if (code == 4) {
 383         src  += f->mv[bytestream2_get_byte(&f->g)];
 384         dc    = bytestream2_get_le16(&f->g2);
 385     } else if (code == 5) {
 386         scale = 0;
 387         dc    = bytestream2_get_le16(&f->g2);
 388     }
 389
 390     if (start > src || src > end) {
 391         av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 392         return AVERROR_INVALIDDATA;
 393     }
 394
 395     mcdc(dst, src, log2w, h, stride, scale, dc);
 396
 397     return 0;
 398 }
 399
 400 static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length)
 401 {
 402     int x, y;
 403     const int width  = f->avctx->width;
 404     const int height = f->avctx->height;
 405     uint16_t *dst    = f->frame_buffer;
 406     uint16_t *src;
 407     unsigned int bitstream_size, bytestream_size, wordstream_size, extra,
 408                  bytestream_offset, wordstream_offset;
 409     int ret;
 410
 411     src = f->last_frame_buffer;
 412
 413     if (f->version > 1) {
 414         if (length < 20)
 415             return AVERROR_INVALIDDATA;
 416         extra           = 20;
 417         bitstream_size  = AV_RL32(buf + 8);
 418         wordstream_size = AV_RL32(buf + 12);
 419         bytestream_size = AV_RL32(buf + 16);
 420     } else {
 421         extra           = 0;
 422         bitstream_size  = AV_RL16(buf - 4);
 423         wordstream_size = AV_RL16(buf - 2);
 424         bytestream_size = FFMAX(length - bitstream_size - wordstream_size, 0);
 425     }
 426
 427     if (bitstream_size + bytestream_size + wordstream_size + extra != length
 428         || bitstream_size  > (1 << 26)
 429         || bytestream_size > (1 << 26)
 430         || wordstream_size > (1 << 26)) {
 431         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n",
 432                bitstream_size, bytestream_size, wordstream_size,
 433                bitstream_size + bytestream_size + wordstream_size - length);
 434         return AVERROR_INVALIDDATA;
 435     }
 436
 437     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 438                    bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 439     if (!f->bitstream_buffer)
 440         return AVERROR(ENOMEM);
 441     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)(buf + extra),
 442                      bitstream_size / 4);
 443     memset((uint8_t*)f->bitstream_buffer + bitstream_size,
 444            0, FF_INPUT_BUFFER_PADDING_SIZE);
 445     init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size);
 446
 447     wordstream_offset = extra + bitstream_size;
 448     bytestream_offset = extra + bitstream_size + wordstream_size;
 449     bytestream2_init(&f->g2, buf + wordstream_offset,
 450                      length - wordstream_offset);
 451     bytestream2_init(&f->g, buf + bytestream_offset,
 452                      length - bytestream_offset);
 453
 454     init_mv(f, width * 2);
 455
 456     for (y = 0; y < height; y += 8) {
 457         for (x = 0; x < width; x += 8)
 458             if ((ret = decode_p_block(f, dst + x, src + x, 3, 3, width)) < 0)
 459                 return ret;
 460         src += 8 * width;
 461         dst += 8 * width;
 462     }
 463
 464     return 0;
 465 }
 466
 467 /**
 468  * decode block and dequantize.
 469  * Note this is almost identical to MJPEG.
 470  */
 471 static int decode_i_block(FourXContext *f, int16_t *block)
 472 {
 473     int code, i, j, level, val;
 474
 475     /* DC coef */
 476     val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 477     if (val >> 4)
 478         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 479
 480     if (val)
 481         val = get_xbits(&f->gb, val);
 482
 483     val        = val * dequant_table[0] + f->last_dc;
 484     f->last_dc = block[0] = val;
 485     /* AC coefs */
 486     i = 1;
 487     for (;;) {
 488         code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 489
 490         /* EOB */
 491         if (code == 0)
 492             break;
 493         if (code == 0xf0) {
 494             i += 16;
 495         } else {
 496             level = get_xbits(&f->gb, code & 0xf);
 497             i    += code >> 4;
 498             if (i >= 64) {
 499                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
 500                 return 0;
 501             }
 502
 503             j = ff_zigzag_direct[i];
 504             block[j] = level * dequant_table[j];
 505             i++;
 506             if (i >= 64)
 507                 break;
 508         }
 509     }
 510
 511     return 0;
 512 }
 513
 514 static inline void idct_put(FourXContext *f, int x, int y)
 515 {
 516     int16_t (*block)[64] = f->block;
 517     int stride           = f->avctx->width;
 518     int i;
 519     uint16_t *dst = f->frame_buffer + y * stride + x;
 520
 521     for (i = 0; i < 4; i++) {
 522         block[i][0] += 0x80 * 8 * 8;
 523         idct(block[i]);
 524     }
 525
 526     if (!(f->avctx->flags & CODEC_FLAG_GRAY)) {
 527         for (i = 4; i < 6; i++)
 528             idct(block[i]);
 529     }
 530
 531     /* Note transform is:
 532      * y  = ( 1b + 4g + 2r) / 14
 533      * cb = ( 3b - 2g - 1r) / 14
 534      * cr = (-1b - 4g + 5r) / 14 */
 535     for (y = 0; y < 8; y++) {
 536         for (x = 0; x < 8; x++) {
 537             int16_t *temp = block[(x >> 2) + 2 * (y >> 2)] +
 538                             2 * (x & 3) + 2 * 8 * (y & 3); // FIXME optimize
 539             int cb = block[4][x + 8 * y];
 540             int cr = block[5][x + 8 * y];
 541             int cg = (cb + cr) >> 1;
 542             int y;
 543
 544             cb += cb;
 545
 546             y               = temp[0];
 547             dst[0]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 548             y               = temp[1];
 549             dst[1]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 550             y               = temp[8];
 551             dst[stride]     = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 552             y               = temp[9];
 553             dst[1 + stride] = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 554             dst            += 2;
 555         }
 556         dst += 2 * stride - 2 * 8;
 557     }
 558 }
 559
 560 static int decode_i_mb(FourXContext *f)
 561 {
 562     int ret;
 563     int i;
 564
 565     f->dsp.clear_blocks(f->block[0]);
 566
 567     for (i = 0; i < 6; i++)
 568         if ((ret = decode_i_block(f, f->block[i])) < 0)
 569             return ret;
 570
 571     return 0;
 572 }
 573
 574 static const uint8_t *read_huffman_tables(FourXContext *f,
 575                                           const uint8_t * const buf,
 576                                           int len)
 577 {
 578     int frequency[512] = { 0 };
 579     uint8_t flag[512];
 580     int up[512];
 581     uint8_t len_tab[257];
 582     int bits_tab[257];
 583     int start, end;
 584     const uint8_t *ptr = buf;
 585     int j;
 586
 587     memset(up, -1, sizeof(up));
 588
 589     start = *ptr++;
 590     end   = *ptr++;
 591     for (;;) {
 592         int i;
 593
 594         len -= end - start + 1;
 595
 596         if (end < start || len < 0)
 597             return NULL;
 598
 599         for (i = start; i <= end; i++)
 600             frequency[i] = *ptr++;
 601         start = *ptr++;
 602         if (start == 0)
 603             break;
 604
 605         if (--len < 0)
 606             return NULL;
 607
 608         end = *ptr++;
 609     }
 610     frequency[256] = 1;
 611
 612     while ((ptr - buf) & 3)
 613         ptr++; // 4byte align
 614
 615     for (j = 257; j < 512; j++) {
 616         int min_freq[2] = { 256 * 256, 256 * 256 };
 617         int smallest[2] = { 0, 0 };
 618         int i;
 619         for (i = 0; i < j; i++) {
 620             if (frequency[i] == 0)
 621                 continue;
 622             if (frequency[i] < min_freq[1]) {
 623                 if (frequency[i] < min_freq[0]) {
 624                     min_freq[1] = min_freq[0];
 625                     smallest[1] = smallest[0];
 626                     min_freq[0] = frequency[i];
 627                     smallest[0] = i;
 628                 } else {
 629                     min_freq[1] = frequency[i];
 630                     smallest[1] = i;
 631                 }
 632             }
 633         }
 634         if (min_freq[1] == 256 * 256)
 635             break;
 636
 637         frequency[j]           = min_freq[0] + min_freq[1];
 638         flag[smallest[0]]      = 0;
 639         flag[smallest[1]]      = 1;
 640         up[smallest[0]]        =
 641         up[smallest[1]]        = j;
 642         frequency[smallest[0]] = frequency[smallest[1]] = 0;
 643     }
 644
 645     for (j = 0; j < 257; j++) {
 646         int node, len = 0, bits = 0;
 647
 648         for (node = j; up[node] != -1; node = up[node]) {
 649             bits += flag[node] << len;
 650             len++;
 651             if (len > 31)
 652                 // can this happen at all ?
 653                 av_log(f->avctx, AV_LOG_ERROR,
 654                        "vlc length overflow\n");
 655         }
 656
 657         bits_tab[j] = bits;
 658         len_tab[j]  = len;
 659     }
 660
 661     if (init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, len_tab, 1, 1,
 662                  bits_tab, 4, 4, 0))
 663         return NULL;
 664
 665     return ptr;
 666 }
 667
 668 static int mix(int c0, int c1)
 669 {
 670     int blue  =  2 * (c0 & 0x001F) + (c1 & 0x001F);
 671     int green = (2 * (c0 & 0x03E0) + (c1 & 0x03E0)) >> 5;
 672     int red   =  2 * (c0 >> 10)    + (c1 >> 10);
 673     return red / 3 * 1024 + green / 3 * 32 + blue / 3;
 674 }
 675
 676 static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length)
 677 {
 678     int x, y, x2, y2;
 679     const int width  = f->avctx->width;
 680     const int height = f->avctx->height;
 681     const int mbs    = (FFALIGN(width, 16) >> 4) * (FFALIGN(height, 16) >> 4);
 682     uint16_t *dst    = f->frame_buffer;
 683     GetByteContext g3;
 684
 685     if (length < mbs * 8) {
 686         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 687         return AVERROR_INVALIDDATA;
 688     }
 689     bytestream2_init(&g3, buf, length);
 690
 691     for (y = 0; y < height; y += 16) {
 692         for (x = 0; x < width; x += 16) {
 693             unsigned int color[4] = { 0 }, bits;
 694             // warning following is purely guessed ...
 695             color[0] = bytestream2_get_le16u(&g3);
 696             color[1] = bytestream2_get_le16u(&g3);
 697
 698             if (color[0] & 0x8000)
 699                 av_log(f->avctx, AV_LOG_ERROR, "unk bit 1\n");
 700             if (color[1] & 0x8000)
 701                 av_log(f->avctx, AV_LOG_ERROR, "unk bit 2\n");
 702
 703             color[2] = mix(color[0], color[1]);
 704             color[3] = mix(color[1], color[0]);
 705
 706             bits = bytestream2_get_le32u(&g3);
 707             for (y2 = 0; y2 < 16; y2++) {
 708                 for (x2 = 0; x2 < 16; x2++) {
 709                     int index = 2 * (x2 >> 2) + 8 * (y2 >> 2);
 710                     dst[y2 * width + x2] = color[(bits >> index) & 3];
 711                 }
 712             }
 713             dst += 16;
 714         }
 715         dst += 16 * width - x;
 716     }
 717
 718     return 0;
 719 }
 720
 721 static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length)
 722 {
 723     int x, y, ret;
 724     const int width  = f->avctx->width;
 725     const int height = f->avctx->height;
 726     const unsigned int bitstream_size = AV_RL32(buf);
 727     int token_count av_unused;
 728     unsigned int prestream_size;
 729     const uint8_t *prestream;
 730
 731     if (bitstream_size > (1 << 26))
 732         return AVERROR_INVALIDDATA;
 733
 734     if (length < bitstream_size + 12) {
 735         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 736         return AVERROR_INVALIDDATA;
 737     }
 738
 739     token_count    =     AV_RL32(buf + bitstream_size + 8);
 740     prestream_size = 4 * AV_RL32(buf + bitstream_size + 4);
 741     prestream      =             buf + bitstream_size + 12;
 742
 743     if (prestream_size + bitstream_size + 12 != length
 744         || prestream_size > (1 << 26)) {
 745         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n",
 746                prestream_size, bitstream_size, length);
 747         return AVERROR_INVALIDDATA;
 748     }
 749
 750     prestream = read_huffman_tables(f, prestream, prestream_size);
 751     if (!prestream) {
 752         av_log(f->avctx, AV_LOG_ERROR, "Error reading Huffman tables.\n");
 753         return AVERROR_INVALIDDATA;
 754     }
 755
 756     init_get_bits(&f->gb, buf + 4, 8 * bitstream_size);
 757
 758     prestream_size = length + buf - prestream;
 759
 760     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 761                    prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 762     if (!f->bitstream_buffer)
 763         return AVERROR(ENOMEM);
 764     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)prestream,
 765                      prestream_size / 4);
 766     memset((uint8_t*)f->bitstream_buffer + prestream_size,
 767            0, FF_INPUT_BUFFER_PADDING_SIZE);
 768     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size);
 769
 770     f->last_dc = 0 * 128 * 8 * 8;
 771
 772     for (y = 0; y < height; y += 16) {
 773         for (x = 0; x < width; x += 16) {
 774             if ((ret = decode_i_mb(f)) < 0)
 775                 return ret;
 776
 777             idct_put(f, x, y);
 778         }
 779     }
 780
 781     if (get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
 782         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 783
 784     return 0;
 785 }
 786
 787 static int decode_frame(AVCodecContext *avctx, void *data,
 788                         int *got_frame, AVPacket *avpkt)
 789 {
 790     const uint8_t *buf    = avpkt->data;
 791     int buf_size          = avpkt->size;
 792     FourXContext *const f = avctx->priv_data;
 793     AVFrame *picture      = data;
 794     int i, frame_4cc, frame_size, ret;
 795
 796     if (buf_size < 20)
 797         return AVERROR_INVALIDDATA;
 798
 799     if (avctx->width % 16 || avctx->height % 16) {
 800         av_log(avctx, AV_LOG_ERROR,
 801                "Dimensions non-multiple of 16 are invalid.\n");
 802         return AVERROR_INVALIDDATA;
 803     }
 804
 805     if (buf_size < AV_RL32(buf + 4) + 8) {
 806         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n",
 807                buf_size, AV_RL32(buf + 4));
 808         return AVERROR_INVALIDDATA;
 809     }
 810
 811     frame_4cc = AV_RL32(buf);
 812
 813     if (frame_4cc == AV_RL32("cfrm")) {
 814         int free_index       = -1;
 815         int id, whole_size;
 816         const int data_size  = buf_size - 20;
 817         CFrameBuffer *cfrm;
 818
 819         if (data_size < 0)
 820             return AVERROR_INVALIDDATA;
 821
 822         id         = AV_RL32(buf + 12);
 823         whole_size = AV_RL32(buf + 16);
 824
 825         for (i = 0; i < CFRAME_BUFFER_COUNT; i++)
 826             if (f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
 827                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n",
 828                        f->cfrm[i].id);
 829
 830         for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 831             if (f->cfrm[i].id == id)
 832                 break;
 833             if (f->cfrm[i].size == 0)
 834                 free_index = i;
 835         }
 836
 837         if (i >= CFRAME_BUFFER_COUNT) {
 838             i             = free_index;
 839             f->cfrm[i].id = id;
 840         }
 841         cfrm = &f->cfrm[i];
 842
 843         cfrm->data = av_fast_realloc(cfrm->data, &cfrm->allocated_size,
 844                                      cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
 845         // explicit check needed as memcpy below might not catch a NULL
 846         if (!cfrm->data) {
 847             av_log(f->avctx, AV_LOG_ERROR, "realloc failure");
 848             return AVERROR(ENOMEM);
 849         }
 850
 851         memcpy(cfrm->data + cfrm->size, buf + 20, data_size);
 852         cfrm->size += data_size;
 853
 854         if (cfrm->size >= whole_size) {
 855             buf        = cfrm->data;
 856             frame_size = cfrm->size;
 857
 858             if (id != avctx->frame_number)
 859                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n",
 860                        id, avctx->frame_number);
 861
 862             if (f->version <= 1)
 863                 return AVERROR_INVALIDDATA;
 864
 865             cfrm->size = cfrm->id = 0;
 866             frame_4cc  = AV_RL32("pfrm");
 867         } else
 868             return buf_size;
 869     } else {
 870         buf        = buf      + 12;
 871         frame_size = buf_size - 12;
 872     }
 873
 874
 875     if ((ret = ff_get_buffer(avctx, picture, 0)) < 0) {
 876         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 877         return ret;
 878     }
 879
 880     if (frame_4cc == AV_RL32("ifr2")) {
 881         picture->pict_type = AV_PICTURE_TYPE_I;
 882         if ((ret = decode_i2_frame(f, buf - 4, frame_size + 4)) < 0)
 883             return ret;
 884     } else if (frame_4cc == AV_RL32("ifrm")) {
 885         picture->pict_type = AV_PICTURE_TYPE_I;
 886         if ((ret = decode_i_frame(f, buf, frame_size)) < 0)
 887             return ret;
 888     } else if (frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")) {
 889         picture->pict_type = AV_PICTURE_TYPE_P;
 890         if ((ret = decode_p_frame(f, buf, frame_size)) < 0)
 891             return ret;
 892     } else if (frame_4cc == AV_RL32("snd_")) {
 893         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n",
 894                buf_size);
 895     } else {
 896         av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n",
 897                buf_size);
 898     }
 899
 900     picture->key_frame = picture->pict_type == AV_PICTURE_TYPE_I;
 901
 902     av_image_copy_plane(picture->data[0], picture->linesize[0],
 903                         (const uint8_t*)f->frame_buffer,  avctx->width * 2,
 904                         avctx->width * 2, avctx->height);
 905     FFSWAP(uint16_t *, f->frame_buffer, f->last_frame_buffer);
 906
 907     *got_frame = 1;
 908
 909     emms_c();
 910
 911     return buf_size;
 912 }
 913
 914 static av_cold int decode_end(AVCodecContext *avctx)
 915 {
 916     FourXContext * const f = avctx->priv_data;
 917     int i;
 918
 919     av_freep(&f->frame_buffer);
 920     av_freep(&f->last_frame_buffer);
 921     av_freep(&f->bitstream_buffer);
 922     f->bitstream_buffer_size = 0;
 923     for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 924         av_freep(&f->cfrm[i].data);
 925         f->cfrm[i].allocated_size = 0;
 926     }
 927     ff_free_vlc(&f->pre_vlc);
 928
 929     return 0;
 930 }
 931
 932 static av_cold int decode_init(AVCodecContext *avctx)
 933 {
 934     FourXContext * const f = avctx->priv_data;
 935     int ret;
 936
 937     if (avctx->extradata_size != 4 || !avctx->extradata) {
 938         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
 939         return AVERROR_INVALIDDATA;
 940     }
 941
 942     ret = av_image_check_size(avctx->width, avctx->height, 0, avctx);
 943     if (ret < 0)
 944         return ret;
 945
 946     f->frame_buffer      = av_mallocz(avctx->width * avctx->height * 2);
 947     f->last_frame_buffer = av_mallocz(avctx->width * avctx->height * 2);
 948     if (!f->frame_buffer || !f->last_frame_buffer) {
 949         decode_end(avctx);
 950         return AVERROR(ENOMEM);
 951     }
 952
 953     f->version = AV_RL32(avctx->extradata) >> 16;
 954     ff_dsputil_init(&f->dsp, avctx);
 955     f->avctx = avctx;
 956     init_vlcs(f);
 957
 958     if (f->version > 2)
 959         avctx->pix_fmt = AV_PIX_FMT_RGB565;
 960     else
 961         avctx->pix_fmt = AV_PIX_FMT_BGR555;
 962
 963     return 0;
 964 }
 965
 966 AVCodec ff_fourxm_decoder = {
 967     .name           = "4xm",
 968     .long_name      = NULL_IF_CONFIG_SMALL("4X Movie"),
 969     .type           = AVMEDIA_TYPE_VIDEO,
 970     .id             = AV_CODEC_ID_4XM,
 971     .priv_data_size = sizeof(FourXContext),
 972     .init           = decode_init,
 973     .close          = decode_end,
 974     .decode         = decode_frame,
 975     .capabilities   = CODEC_CAP_DR1,
 976 };