git.sesse.net Git - ffmpeg/blob - libavcodec/4xm.c

   1 /*
   2  * 4XM codec
   3  * Copyright (c) 2003 Michael Niedermayer
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * 4XM codec.
  25  */
  26
  27 #include "libavutil/intreadwrite.h"
  28 #include "avcodec.h"
  29 #include "bytestream.h"
  30 #include "dsputil.h"
  31 #include "get_bits.h"
  32 #include "internal.h"
  33
  34 //#undef NDEBUG
  35 //#include <assert.h>
  36
  37 #define BLOCK_TYPE_VLC_BITS 5
  38 #define ACDC_VLC_BITS 9
  39
  40 #define CFRAME_BUFFER_COUNT 100
  41
  42 static const uint8_t block_type_tab[2][4][8][2] = {
  43     {
  44         {    // { 8, 4, 2 } x { 8, 4, 2}
  45             { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 30, 5 }, { 31, 5 }, { 0, 0 }
  46         }, { // { 8, 4 } x 1
  47             { 0, 1 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  48         }, { // 1 x { 8, 4 }
  49             { 0, 1 }, { 2, 2 }, { 0, 0 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  50         }, { // 1 x 2, 2 x 1
  51             { 0, 1 }, { 0, 0 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }
  52         }
  53     }, {
  54         {   // { 8, 4, 2 } x { 8, 4, 2}
  55             { 1, 2 }, { 4, 3 }, { 5, 3 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  56         }, {// { 8, 4 } x 1
  57             { 1, 2 }, { 0, 0 }, { 2, 2 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  58         }, {// 1 x { 8, 4 }
  59             { 1, 2 }, { 2, 2 }, { 0, 0 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  60         }, {// 1 x 2, 2 x 1
  61             { 1, 2 }, { 0, 0 }, { 0, 0 }, { 0, 2 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
  62       }
  63     }
  64 };
  65
  66 static const uint8_t size2index[4][4] = {
  67     { -1, 3, 1, 1 },
  68     {  3, 0, 0, 0 },
  69     {  2, 0, 0, 0 },
  70     {  2, 0, 0, 0 },
  71 };
  72
  73 static const int8_t mv[256][2] = {
  74     {   0,   0 }, {   0,  -1 }, {  -1,   0 }, {   1,   0 }, {   0,   1 }, {  -1,  -1 }, {   1,  -1 }, {  -1,   1 },
  75     {   1,   1 }, {   0,  -2 }, {  -2,   0 }, {   2,   0 }, {   0,   2 }, {  -1,  -2 }, {   1,  -2 }, {  -2,  -1 },
  76     {   2,  -1 }, {  -2,   1 }, {   2,   1 }, {  -1,   2 }, {   1,   2 }, {  -2,  -2 }, {   2,  -2 }, {  -2,   2 },
  77     {   2,   2 }, {   0,  -3 }, {  -3,   0 }, {   3,   0 }, {   0,   3 }, {  -1,  -3 }, {   1,  -3 }, {  -3,  -1 },
  78     {   3,  -1 }, {  -3,   1 }, {   3,   1 }, {  -1,   3 }, {   1,   3 }, {  -2,  -3 }, {   2,  -3 }, {  -3,  -2 },
  79     {   3,  -2 }, {  -3,   2 }, {   3,   2 }, {  -2,   3 }, {   2,   3 }, {   0,  -4 }, {  -4,   0 }, {   4,   0 },
  80     {   0,   4 }, {  -1,  -4 }, {   1,  -4 }, {  -4,  -1 }, {   4,  -1 }, {   4,   1 }, {  -1,   4 }, {   1,   4 },
  81     {  -3,  -3 }, {  -3,   3 }, {   3,   3 }, {  -2,  -4 }, {  -4,  -2 }, {   4,  -2 }, {  -4,   2 }, {  -2,   4 },
  82     {   2,   4 }, {  -3,  -4 }, {   3,  -4 }, {   4,  -3 }, {  -5,   0 }, {  -4,   3 }, {  -3,   4 }, {   3,   4 },
  83     {  -1,  -5 }, {  -5,  -1 }, {  -5,   1 }, {  -1,   5 }, {  -2,  -5 }, {   2,  -5 }, {   5,  -2 }, {   5,   2 },
  84     {  -4,  -4 }, {  -4,   4 }, {  -3,  -5 }, {  -5,  -3 }, {  -5,   3 }, {   3,   5 }, {  -6,   0 }, {   0,   6 },
  85     {  -6,  -1 }, {  -6,   1 }, {   1,   6 }, {   2,  -6 }, {  -6,   2 }, {   2,   6 }, {  -5,  -4 }, {   5,   4 },
  86     {   4,   5 }, {  -6,  -3 }, {   6,   3 }, {  -7,   0 }, {  -1,  -7 }, {   5,  -5 }, {  -7,   1 }, {  -1,   7 },
  87     {   4,  -6 }, {   6,   4 }, {  -2,  -7 }, {  -7,   2 }, {  -3,  -7 }, {   7,  -3 }, {   3,   7 }, {   6,  -5 },
  88     {   0,  -8 }, {  -1,  -8 }, {  -7,  -4 }, {  -8,   1 }, {   4,   7 }, {   2,  -8 }, {  -2,   8 }, {   6,   6 },
  89     {  -8,   3 }, {   5,  -7 }, {  -5,   7 }, {   8,  -4 }, {   0,  -9 }, {  -9,  -1 }, {   1,   9 }, {   7,  -6 },
  90     {  -7,   6 }, {  -5,  -8 }, {  -5,   8 }, {  -9,   3 }, {   9,  -4 }, {   7,  -7 }, {   8,  -6 }, {   6,   8 },
  91     {  10,   1 }, { -10,   2 }, {   9,  -5 }, {  10,  -3 }, {  -8,  -7 }, { -10,  -4 }, {   6,  -9 }, { -11,   0 },
  92     {  11,   1 }, { -11,  -2 }, {  -2,  11 }, {   7,  -9 }, {  -7,   9 }, {  10,   6 }, {  -4,  11 }, {   8,  -9 },
  93     {   8,   9 }, {   5,  11 }, {   7, -10 }, {  12,  -3 }, {  11,   6 }, {  -9,  -9 }, {   8,  10 }, {   5,  12 },
  94     { -11,   7 }, {  13,   2 }, {   6, -12 }, {  10,   9 }, { -11,   8 }, {  -7,  12 }, {   0,  14 }, {  14,  -2 },
  95     {  -9,  11 }, {  -6,  13 }, { -14,  -4 }, {  -5, -14 }, {   5,  14 }, { -15,  -1 }, { -14,  -6 }, {   3, -15 },
  96     {  11, -11 }, {  -7,  14 }, {  -5,  15 }, {   8, -14 }, {  15,   6 }, {   3,  16 }, {   7, -15 }, { -16,   5 },
  97     {   0,  17 }, { -16,  -6 }, { -10,  14 }, { -16,   7 }, {  12,  13 }, { -16,   8 }, { -17,   6 }, { -18,   3 },
  98     {  -7,  17 }, {  15,  11 }, {  16,  10 }, {   2, -19 }, {   3, -19 }, { -11, -16 }, { -18,   8 }, { -19,  -6 },
  99     {   2, -20 }, { -17, -11 }, { -10, -18 }, {   8,  19 }, { -21,  -1 }, { -20,   7 }, {  -4,  21 }, {  21,   5 },
 100     {  15,  16 }, {   2, -22 }, { -10, -20 }, { -22,   5 }, {  20, -11 }, {  -7, -22 }, { -12,  20 }, {  23,  -5 },
 101     {  13, -20 }, {  24,  -2 }, { -15,  19 }, { -11,  22 }, {  16,  19 }, {  23, -10 }, { -18, -18 }, {  -9, -24 },
 102     {  24, -10 }, {  -3,  26 }, { -23,  13 }, { -18, -20 }, {  17,  21 }, {  -4,  27 }, {  27,   6 }, {   1, -28 },
 103     { -11,  26 }, { -17, -23 }, {   7,  28 }, {  11, -27 }, {  29,   5 }, { -23, -19 }, { -28, -11 }, { -21,  22 },
 104     { -30,   7 }, { -17,  26 }, { -27,  16 }, {  13,  29 }, {  19, -26 }, {  10, -31 }, { -14, -30 }, {  20, -27 },
 105     { -29,  18 }, { -16, -31 }, { -28, -22 }, {  21, -30 }, { -25,  28 }, {  26, -29 }, {  25, -32 }, { -32, -32 }
 106 };
 107
 108 /* This is simply the scaled down elementwise product of the standard JPEG
 109  * quantizer table and the AAN premul table. */
 110 static const uint8_t dequant_table[64] = {
 111     16, 15, 13, 19, 24, 31, 28, 17,
 112     17, 23, 25, 31, 36, 63, 45, 21,
 113     18, 24, 27, 37, 52, 59, 49, 20,
 114     16, 28, 34, 40, 60, 80, 51, 20,
 115     18, 31, 48, 66, 68, 86, 56, 21,
 116     19, 38, 56, 59, 64, 64, 48, 20,
 117     27, 48, 55, 55, 56, 51, 35, 15,
 118     20, 35, 34, 32, 31, 22, 15,  8,
 119 };
 120
 121 static VLC block_type_vlc[2][4];
 122
 123
 124 typedef struct CFrameBuffer {
 125     unsigned int allocated_size;
 126     unsigned int size;
 127     int id;
 128     uint8_t *data;
 129 } CFrameBuffer;
 130
 131 typedef struct FourXContext {
 132     AVCodecContext *avctx;
 133     DSPContext dsp;
 134     AVFrame current_picture, last_picture;
 135     GetBitContext pre_gb;          ///< ac/dc prefix
 136     GetBitContext gb;
 137     GetByteContext g;
 138     GetByteContext g2;
 139     int mv[256];
 140     VLC pre_vlc;
 141     int last_dc;
 142     DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
 143     void *bitstream_buffer;
 144     unsigned int bitstream_buffer_size;
 145     int version;
 146     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 147 } FourXContext;
 148
 149
 150 #define FIX_1_082392200  70936
 151 #define FIX_1_414213562  92682
 152 #define FIX_1_847759065 121095
 153 #define FIX_2_613125930 171254
 154
 155 #define MULTIPLY(var, const) (((var) * (const)) >> 16)
 156
 157 static void idct(DCTELEM block[64])
 158 {
 159     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 160     int tmp10, tmp11, tmp12, tmp13;
 161     int z5, z10, z11, z12, z13;
 162     int i;
 163     int temp[64];
 164
 165     for (i = 0; i < 8; i++) {
 166         tmp10 = block[8 * 0 + i] + block[8 * 4 + i];
 167         tmp11 = block[8 * 0 + i] - block[8 * 4 + i];
 168
 169         tmp13 = block[8 * 2 + i] + block[8 * 6 + i];
 170         tmp12 = MULTIPLY(block[8 * 2 + i] - block[8 * 6 + i], FIX_1_414213562) - tmp13;
 171
 172         tmp0 = tmp10 + tmp13;
 173         tmp3 = tmp10 - tmp13;
 174         tmp1 = tmp11 + tmp12;
 175         tmp2 = tmp11 - tmp12;
 176
 177         z13 = block[8 * 5 + i] + block[8 * 3 + i];
 178         z10 = block[8 * 5 + i] - block[8 * 3 + i];
 179         z11 = block[8 * 1 + i] + block[8 * 7 + i];
 180         z12 = block[8 * 1 + i] - block[8 * 7 + i];
 181
 182         tmp7  =          z11 + z13;
 183         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 184
 185         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 186         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 187         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 188
 189         tmp6 = tmp12 - tmp7;
 190         tmp5 = tmp11 - tmp6;
 191         tmp4 = tmp10 + tmp5;
 192
 193         temp[8 * 0 + i] = tmp0 + tmp7;
 194         temp[8 * 7 + i] = tmp0 - tmp7;
 195         temp[8 * 1 + i] = tmp1 + tmp6;
 196         temp[8 * 6 + i] = tmp1 - tmp6;
 197         temp[8 * 2 + i] = tmp2 + tmp5;
 198         temp[8 * 5 + i] = tmp2 - tmp5;
 199         temp[8 * 4 + i] = tmp3 + tmp4;
 200         temp[8 * 3 + i] = tmp3 - tmp4;
 201     }
 202
 203     for (i = 0; i < 8 * 8; i += 8) {
 204         tmp10 = temp[0 + i] + temp[4 + i];
 205         tmp11 = temp[0 + i] - temp[4 + i];
 206
 207         tmp13 = temp[2 + i] + temp[6 + i];
 208         tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
 209
 210         tmp0 = tmp10 + tmp13;
 211         tmp3 = tmp10 - tmp13;
 212         tmp1 = tmp11 + tmp12;
 213         tmp2 = tmp11 - tmp12;
 214
 215         z13 = temp[5 + i] + temp[3 + i];
 216         z10 = temp[5 + i] - temp[3 + i];
 217         z11 = temp[1 + i] + temp[7 + i];
 218         z12 = temp[1 + i] - temp[7 + i];
 219
 220         tmp7  = z11 + z13;
 221         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 222
 223         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 224         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 225         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 226
 227         tmp6 = tmp12 - tmp7;
 228         tmp5 = tmp11 - tmp6;
 229         tmp4 = tmp10 + tmp5;
 230
 231         block[0 + i] = (tmp0 + tmp7) >> 6;
 232         block[7 + i] = (tmp0 - tmp7) >> 6;
 233         block[1 + i] = (tmp1 + tmp6) >> 6;
 234         block[6 + i] = (tmp1 - tmp6) >> 6;
 235         block[2 + i] = (tmp2 + tmp5) >> 6;
 236         block[5 + i] = (tmp2 - tmp5) >> 6;
 237         block[4 + i] = (tmp3 + tmp4) >> 6;
 238         block[3 + i] = (tmp3 - tmp4) >> 6;
 239     }
 240 }
 241
 242 static av_cold void init_vlcs(FourXContext *f)
 243 {
 244     static VLC_TYPE table[2][4][32][2];
 245     int i, j;
 246
 247     for (i = 0; i < 2; i++) {
 248         for (j = 0; j < 4; j++) {
 249             block_type_vlc[i][j].table           = table[i][j];
 250             block_type_vlc[i][j].table_allocated = 32;
 251             init_vlc(&block_type_vlc[i][j], BLOCK_TYPE_VLC_BITS, 7,
 252                      &block_type_tab[i][j][0][1], 2, 1,
 253                      &block_type_tab[i][j][0][0], 2, 1,
 254                      INIT_VLC_USE_NEW_STATIC);
 255         }
 256     }
 257 }
 258
 259 static void init_mv(FourXContext *f)
 260 {
 261     int i;
 262
 263     for (i = 0; i < 256; i++) {
 264         if (f->version > 1)
 265             f->mv[i] = mv[i][0] + mv[i][1] * f->current_picture.linesize[0] / 2;
 266         else
 267             f->mv[i] = (i & 15) - 8 + ((i >> 4) - 8) * f->current_picture.linesize[0] / 2;
 268     }
 269 }
 270
 271 #if HAVE_BIGENDIAN
 272 #define LE_CENTRIC_MUL(dst, src, scale, dc)             \
 273     {                                                   \
 274         unsigned tmpval = AV_RN32(src);                 \
 275         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 276         tmpval = tmpval * (scale) + (dc);               \
 277         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 278         AV_WN32A(dst, tmpval);                          \
 279     }
 280 #else
 281 #define LE_CENTRIC_MUL(dst, src, scale, dc)              \
 282     {                                                    \
 283         unsigned tmpval = AV_RN32(src) * (scale) + (dc); \
 284         AV_WN32A(dst, tmpval);                           \
 285     }
 286 #endif
 287
 288 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w,
 289                         int h, int stride, int scale, unsigned dc)
 290 {
 291     int i;
 292     dc *= 0x10001;
 293
 294     switch (log2w) {
 295     case 0:
 296         for (i = 0; i < h; i++) {
 297             dst[0] = scale * src[0] + dc;
 298             if (scale)
 299                 src += stride;
 300             dst += stride;
 301         }
 302         break;
 303     case 1:
 304         for (i = 0; i < h; i++) {
 305             LE_CENTRIC_MUL(dst, src, scale, dc);
 306             if (scale)
 307                 src += stride;
 308             dst += stride;
 309         }
 310         break;
 311     case 2:
 312         for (i = 0; i < h; i++) {
 313             LE_CENTRIC_MUL(dst, src, scale, dc);
 314             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 315             if (scale)
 316                 src += stride;
 317             dst += stride;
 318         }
 319         break;
 320     case 3:
 321         for (i = 0; i < h; i++) {
 322             LE_CENTRIC_MUL(dst,     src,     scale, dc);
 323             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 324             LE_CENTRIC_MUL(dst + 4, src + 4, scale, dc);
 325             LE_CENTRIC_MUL(dst + 6, src + 6, scale, dc);
 326             if (scale)
 327                 src += stride;
 328             dst += stride;
 329         }
 330         break;
 331     default:
 332         assert(0);
 333     }
 334 }
 335
 336 static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src,
 337                            int log2w, int log2h, int stride)
 338 {
 339     const int index = size2index[log2h][log2w];
 340     const int h     = 1 << log2h;
 341     int code        = get_vlc2(&f->gb,
 342                                block_type_vlc[1 - (f->version > 1)][index].table,
 343                                BLOCK_TYPE_VLC_BITS, 1);
 344     uint16_t *start = (uint16_t *)f->last_picture.data[0];
 345     uint16_t *end   = start + stride * (f->avctx->height - h + 1) - (1 << log2w);
 346
 347     assert(code >= 0 && code <= 6);
 348
 349     if (code == 0) {
 350         src += f->mv[bytestream2_get_byte(&f->g)];
 351         if (start > src || src > end) {
 352             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 353             return;
 354         }
 355         mcdc(dst, src, log2w, h, stride, 1, 0);
 356     } else if (code == 1) {
 357         log2h--;
 358         decode_p_block(f, dst, src, log2w, log2h, stride);
 359         decode_p_block(f, dst + (stride << log2h),
 360                           src + (stride << log2h), log2w, log2h, stride);
 361     } else if (code == 2) {
 362         log2w--;
 363         decode_p_block(f, dst , src, log2w, log2h, stride);
 364         decode_p_block(f, dst + (1 << log2w),
 365                           src + (1 << log2w), log2w, log2h, stride);
 366     } else if (code == 3 && f->version < 2) {
 367         mcdc(dst, src, log2w, h, stride, 1, 0);
 368     } else if (code == 4) {
 369         src += f->mv[bytestream2_get_byte(&f->g)];
 370         if (start > src || src > end) {
 371             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 372             return;
 373         }
 374         mcdc(dst, src, log2w, h, stride, 1, bytestream2_get_le16(&f->g2));
 375     } else if (code == 5) {
 376         mcdc(dst, src, log2w, h, stride, 0, bytestream2_get_le16(&f->g2));
 377     } else if (code == 6) {
 378         if (log2w) {
 379             dst[0]      = bytestream2_get_le16(&f->g2);
 380             dst[1]      = bytestream2_get_le16(&f->g2);
 381         } else {
 382             dst[0]      = bytestream2_get_le16(&f->g2);
 383             dst[stride] = bytestream2_get_le16(&f->g2);
 384         }
 385     }
 386 }
 387
 388 static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length)
 389 {
 390     int x, y;
 391     const int width  = f->avctx->width;
 392     const int height = f->avctx->height;
 393     uint16_t *src    = (uint16_t *)f->last_picture.data[0];
 394     uint16_t *dst    = (uint16_t *)f->current_picture.data[0];
 395     const int stride =             f->current_picture.linesize[0] >> 1;
 396     unsigned int bitstream_size, bytestream_size, wordstream_size, extra,
 397                  bytestream_offset, wordstream_offset;
 398
 399     if (f->version > 1) {
 400         extra           = 20;
 401         bitstream_size  = AV_RL32(buf + 8);
 402         wordstream_size = AV_RL32(buf + 12);
 403         bytestream_size = AV_RL32(buf + 16);
 404     } else {
 405         extra           = 0;
 406         bitstream_size  = AV_RL16(buf - 4);
 407         wordstream_size = AV_RL16(buf - 2);
 408         bytestream_size = FFMAX(length - bitstream_size - wordstream_size, 0);
 409     }
 410
 411     if (bitstream_size + bytestream_size + wordstream_size + extra != length
 412         || bitstream_size  > (1 << 26)
 413         || bytestream_size > (1 << 26)
 414         || wordstream_size > (1 << 26)) {
 415         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n",
 416                bitstream_size, bytestream_size, wordstream_size,
 417                bitstream_size + bytestream_size + wordstream_size - length);
 418         return -1;
 419     }
 420
 421     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 422                    bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 423     if (!f->bitstream_buffer)
 424         return AVERROR(ENOMEM);
 425     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)(buf + extra),
 426                      bitstream_size / 4);
 427     memset((uint8_t*)f->bitstream_buffer + bitstream_size,
 428            0, FF_INPUT_BUFFER_PADDING_SIZE);
 429     init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size);
 430
 431     wordstream_offset = extra + bitstream_size;
 432     bytestream_offset = extra + bitstream_size + wordstream_size;
 433     bytestream2_init(&f->g2, buf + wordstream_offset,
 434                      length - wordstream_offset);
 435     bytestream2_init(&f->g, buf + bytestream_offset,
 436                      length - bytestream_offset);
 437
 438     init_mv(f);
 439
 440     for (y = 0; y < height; y += 8) {
 441         for (x = 0; x < width; x += 8)
 442             decode_p_block(f, dst + x, src + x, 3, 3, stride);
 443         src += 8 * stride;
 444         dst += 8 * stride;
 445     }
 446
 447     return 0;
 448 }
 449
 450 /**
 451  * decode block and dequantize.
 452  * Note this is almost identical to MJPEG.
 453  */
 454 static int decode_i_block(FourXContext *f, DCTELEM *block)
 455 {
 456     int code, i, j, level, val;
 457
 458     /* DC coef */
 459     val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 460     if (val >> 4)
 461         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 462
 463     if (val)
 464         val = get_xbits(&f->gb, val);
 465
 466     val        = val * dequant_table[0] + f->last_dc;
 467     f->last_dc = block[0] = val;
 468     /* AC coefs */
 469     i = 1;
 470     for (;;) {
 471         code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 472
 473         /* EOB */
 474         if (code == 0)
 475             break;
 476         if (code == 0xf0) {
 477             i += 16;
 478         } else {
 479             level = get_xbits(&f->gb, code & 0xf);
 480             i    += code >> 4;
 481             if (i >= 64) {
 482                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
 483                 return 0;
 484             }
 485
 486             j = ff_zigzag_direct[i];
 487             block[j] = level * dequant_table[j];
 488             i++;
 489             if (i >= 64)
 490                 break;
 491         }
 492     }
 493
 494     return 0;
 495 }
 496
 497 static inline void idct_put(FourXContext *f, int x, int y)
 498 {
 499     DCTELEM (*block)[64] = f->block;
 500     int stride           = f->current_picture.linesize[0] >> 1;
 501     int i;
 502     uint16_t *dst = ((uint16_t*)f->current_picture.data[0]) + y * stride + x;
 503
 504     for (i = 0; i < 4; i++) {
 505         block[i][0] += 0x80 * 8 * 8;
 506         idct(block[i]);
 507     }
 508
 509     if (!(f->avctx->flags & CODEC_FLAG_GRAY)) {
 510         for (i = 4; i < 6; i++)
 511             idct(block[i]);
 512     }
 513
 514     /* Note transform is:
 515      * y  = ( 1b + 4g + 2r) / 14
 516      * cb = ( 3b - 2g - 1r) / 14
 517      * cr = (-1b - 4g + 5r) / 14 */
 518     for (y = 0; y < 8; y++) {
 519         for (x = 0; x < 8; x++) {
 520             DCTELEM *temp = block[(x >> 2) + 2 * (y >> 2)] +
 521                             2 * (x & 3) + 2 * 8 * (y & 3); // FIXME optimize
 522             int cb = block[4][x + 8 * y];
 523             int cr = block[5][x + 8 * y];
 524             int cg = (cb + cr) >> 1;
 525             int y;
 526
 527             cb += cb;
 528
 529             y               = temp[0];
 530             dst[0]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 531             y               = temp[1];
 532             dst[1]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 533             y               = temp[8];
 534             dst[stride]     = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 535             y               = temp[9];
 536             dst[1 + stride] = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 537             dst            += 2;
 538         }
 539         dst += 2 * stride - 2 * 8;
 540     }
 541 }
 542
 543 static int decode_i_mb(FourXContext *f)
 544 {
 545     int i;
 546
 547     f->dsp.clear_blocks(f->block[0]);
 548
 549     for (i = 0; i < 6; i++)
 550         if (decode_i_block(f, f->block[i]) < 0)
 551             return -1;
 552
 553     return 0;
 554 }
 555
 556 static const uint8_t *read_huffman_tables(FourXContext *f,
 557                                           const uint8_t * const buf)
 558 {
 559     int frequency[512] = { 0 };
 560     uint8_t flag[512];
 561     int up[512];
 562     uint8_t len_tab[257];
 563     int bits_tab[257];
 564     int start, end;
 565     const uint8_t *ptr = buf;
 566     int j;
 567
 568     memset(up, -1, sizeof(up));
 569
 570     start = *ptr++;
 571     end   = *ptr++;
 572     for (;;) {
 573         int i;
 574
 575         for (i = start; i <= end; i++)
 576             frequency[i] = *ptr++;
 577         start = *ptr++;
 578         if (start == 0)
 579             break;
 580
 581         end = *ptr++;
 582     }
 583     frequency[256] = 1;
 584
 585     while ((ptr - buf) & 3)
 586         ptr++; // 4byte align
 587
 588     for (j = 257; j < 512; j++) {
 589         int min_freq[2] = { 256 * 256, 256 * 256 };
 590         int smallest[2] = { 0, 0 };
 591         int i;
 592         for (i = 0; i < j; i++) {
 593             if (frequency[i] == 0)
 594                 continue;
 595             if (frequency[i] < min_freq[1]) {
 596                 if (frequency[i] < min_freq[0]) {
 597                     min_freq[1] = min_freq[0];
 598                     smallest[1] = smallest[0];
 599                     min_freq[0] = frequency[i];
 600                     smallest[0] = i;
 601                 } else {
 602                     min_freq[1] = frequency[i];
 603                     smallest[1] = i;
 604                 }
 605             }
 606         }
 607         if (min_freq[1] == 256 * 256)
 608             break;
 609
 610         frequency[j]           = min_freq[0] + min_freq[1];
 611         flag[smallest[0]]      = 0;
 612         flag[smallest[1]]      = 1;
 613         up[smallest[0]]        =
 614         up[smallest[1]]        = j;
 615         frequency[smallest[0]] = frequency[smallest[1]] = 0;
 616     }
 617
 618     for (j = 0; j < 257; j++) {
 619         int node, len = 0, bits = 0;
 620
 621         for (node = j; up[node] != -1; node = up[node]) {
 622             bits += flag[node] << len;
 623             len++;
 624             if (len > 31)
 625                 // can this happen at all ?
 626                 av_log(f->avctx, AV_LOG_ERROR,
 627                        "vlc length overflow\n");
 628         }
 629
 630         bits_tab[j] = bits;
 631         len_tab[j]  = len;
 632     }
 633
 634     if (init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, len_tab, 1, 1,
 635                  bits_tab, 4, 4, 0))
 636         return NULL;
 637
 638     return ptr;
 639 }
 640
 641 static int mix(int c0, int c1)
 642 {
 643     int blue  =  2 * (c0 & 0x001F) + (c1 & 0x001F);
 644     int green = (2 * (c0 & 0x03E0) + (c1 & 0x03E0)) >> 5;
 645     int red   =  2 * (c0 >> 10)    + (c1 >> 10);
 646     return red / 3 * 1024 + green / 3 * 32 + blue / 3;
 647 }
 648
 649 static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length)
 650 {
 651     int x, y, x2, y2;
 652     const int width  = f->avctx->width;
 653     const int height = f->avctx->height;
 654     const int mbs    = (FFALIGN(width, 16) >> 4) * (FFALIGN(height, 16) >> 4);
 655     uint16_t *dst    = (uint16_t*)f->current_picture.data[0];
 656     const int stride =            f->current_picture.linesize[0]>>1;
 657     GetByteContext g3;
 658
 659     if (length < mbs * 8) {
 660         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 661         return AVERROR_INVALIDDATA;
 662     }
 663     bytestream2_init(&g3, buf, length);
 664
 665     for (y = 0; y < height; y += 16) {
 666         for (x = 0; x < width; x += 16) {
 667             unsigned int color[4] = { 0 }, bits;
 668             // warning following is purely guessed ...
 669             color[0] = bytestream2_get_le16u(&g3);
 670             color[1] = bytestream2_get_le16u(&g3);
 671
 672             if (color[0] & 0x8000)
 673                 av_log(NULL, AV_LOG_ERROR, "unk bit 1\n");
 674             if (color[1] & 0x8000)
 675                 av_log(NULL, AV_LOG_ERROR, "unk bit 2\n");
 676
 677             color[2] = mix(color[0], color[1]);
 678             color[3] = mix(color[1], color[0]);
 679
 680             bits = bytestream2_get_le32u(&g3);
 681             for (y2 = 0; y2 < 16; y2++) {
 682                 for (x2 = 0; x2 < 16; x2++) {
 683                     int index = 2 * (x2 >> 2) + 8 * (y2 >> 2);
 684                     dst[y2 * stride + x2] = color[(bits >> index) & 3];
 685                 }
 686             }
 687             dst += 16;
 688         }
 689         dst += 16 * stride - x;
 690     }
 691
 692     return 0;
 693 }
 694
 695 static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length)
 696 {
 697     int x, y;
 698     const int width  = f->avctx->width;
 699     const int height = f->avctx->height;
 700     const unsigned int bitstream_size = AV_RL32(buf);
 701     int token_count av_unused;
 702     unsigned int prestream_size;
 703     const uint8_t *prestream;
 704
 705     if (length < bitstream_size + 12) {
 706         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 707         return AVERROR_INVALIDDATA;
 708     }
 709
 710     token_count    =     AV_RL32(buf + bitstream_size + 8);
 711     prestream_size = 4 * AV_RL32(buf + bitstream_size + 4);
 712     prestream      =             buf + bitstream_size + 12;
 713
 714     if (prestream_size + bitstream_size + 12 != length
 715         || bitstream_size > (1 << 26)
 716         || prestream_size > (1 << 26)) {
 717         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n",
 718                prestream_size, bitstream_size, length);
 719         return -1;
 720     }
 721
 722     prestream = read_huffman_tables(f, prestream);
 723
 724     init_get_bits(&f->gb, buf + 4, 8 * bitstream_size);
 725
 726     prestream_size = length + buf - prestream;
 727
 728     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 729                    prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 730     if (!f->bitstream_buffer)
 731         return AVERROR(ENOMEM);
 732     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)prestream,
 733                      prestream_size / 4);
 734     memset((uint8_t*)f->bitstream_buffer + prestream_size,
 735            0, FF_INPUT_BUFFER_PADDING_SIZE);
 736     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size);
 737
 738     f->last_dc = 0 * 128 * 8 * 8;
 739
 740     for (y = 0; y < height; y += 16) {
 741         for (x = 0; x < width; x += 16) {
 742             if (decode_i_mb(f) < 0)
 743                 return -1;
 744
 745             idct_put(f, x, y);
 746         }
 747     }
 748
 749     if (get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
 750         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 751
 752     return 0;
 753 }
 754
 755 static int decode_frame(AVCodecContext *avctx, void *data,
 756                         int *got_frame, AVPacket *avpkt)
 757 {
 758     const uint8_t *buf    = avpkt->data;
 759     int buf_size          = avpkt->size;
 760     FourXContext *const f = avctx->priv_data;
 761     AVFrame *picture      = data;
 762     AVFrame *p, temp;
 763     int i, frame_4cc, frame_size;
 764
 765     frame_4cc = AV_RL32(buf);
 766     if (buf_size != AV_RL32(buf + 4) + 8 || buf_size < 20)
 767         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n",
 768                buf_size, AV_RL32(buf + 4));
 769
 770     if (frame_4cc == AV_RL32("cfrm")) {
 771         int free_index       = -1;
 772         const int data_size  = buf_size - 20;
 773         const int id         = AV_RL32(buf + 12);
 774         const int whole_size = AV_RL32(buf + 16);
 775         CFrameBuffer *cfrm;
 776
 777         for (i = 0; i < CFRAME_BUFFER_COUNT; i++)
 778             if (f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
 779                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n",
 780                        f->cfrm[i].id);
 781
 782         for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 783             if (f->cfrm[i].id == id)
 784                 break;
 785             if (f->cfrm[i].size == 0)
 786                 free_index = i;
 787         }
 788
 789         if (i >= CFRAME_BUFFER_COUNT) {
 790             i             = free_index;
 791             f->cfrm[i].id = id;
 792         }
 793         cfrm = &f->cfrm[i];
 794
 795         cfrm->data = av_fast_realloc(cfrm->data, &cfrm->allocated_size,
 796                                      cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
 797         // explicit check needed as memcpy below might not catch a NULL
 798         if (!cfrm->data) {
 799             av_log(f->avctx, AV_LOG_ERROR, "realloc failure");
 800             return -1;
 801         }
 802
 803         memcpy(cfrm->data + cfrm->size, buf + 20, data_size);
 804         cfrm->size += data_size;
 805
 806         if (cfrm->size >= whole_size) {
 807             buf        = cfrm->data;
 808             frame_size = cfrm->size;
 809
 810             if (id != avctx->frame_number)
 811                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n",
 812                        id, avctx->frame_number);
 813
 814             cfrm->size = cfrm->id = 0;
 815             frame_4cc  = AV_RL32("pfrm");
 816         } else
 817             return buf_size;
 818     } else {
 819         buf        = buf      + 12;
 820         frame_size = buf_size - 12;
 821     }
 822
 823     temp               = f->current_picture;
 824     f->current_picture = f->last_picture;
 825     f->last_picture    = temp;
 826
 827     p                  = &f->current_picture;
 828     avctx->coded_frame = p;
 829
 830     // alternatively we would have to use our own buffer management
 831     avctx->flags |= CODEC_FLAG_EMU_EDGE;
 832
 833     if (p->data[0])
 834         avctx->release_buffer(avctx, p);
 835
 836     p->reference = 1;
 837     if (ff_get_buffer(avctx, p) < 0) {
 838         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 839         return -1;
 840     }
 841
 842     if (frame_4cc == AV_RL32("ifr2")) {
 843         p->pict_type = AV_PICTURE_TYPE_I;
 844         if (decode_i2_frame(f, buf - 4, frame_size + 4) < 0)
 845             return -1;
 846     } else if (frame_4cc == AV_RL32("ifrm")) {
 847         p->pict_type = AV_PICTURE_TYPE_I;
 848         if (decode_i_frame(f, buf, frame_size) < 0)
 849             return -1;
 850     } else if (frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")) {
 851         if (!f->last_picture.data[0]) {
 852             f->last_picture.reference = 1;
 853             if (ff_get_buffer(avctx, &f->last_picture) < 0) {
 854                 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 855                 return -1;
 856             }
 857             memset(f->last_picture.data[0], 0, avctx->height * FFABS(f->last_picture.linesize[0]));
 858         }
 859
 860         p->pict_type = AV_PICTURE_TYPE_P;
 861         if (decode_p_frame(f, buf, frame_size) < 0)
 862             return -1;
 863     } else if (frame_4cc == AV_RL32("snd_")) {
 864         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n",
 865                buf_size);
 866     } else {
 867         av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n",
 868                buf_size);
 869     }
 870
 871     p->key_frame = p->pict_type == AV_PICTURE_TYPE_I;
 872
 873     *picture   = *p;
 874     *got_frame = 1;
 875
 876     emms_c();
 877
 878     return buf_size;
 879 }
 880
 881
 882 static av_cold void common_init(AVCodecContext *avctx)
 883 {
 884     FourXContext * const f = avctx->priv_data;
 885
 886     ff_dsputil_init(&f->dsp, avctx);
 887
 888     f->avctx = avctx;
 889 }
 890
 891 static av_cold int decode_init(AVCodecContext *avctx)
 892 {
 893     FourXContext * const f = avctx->priv_data;
 894
 895     if (avctx->extradata_size != 4 || !avctx->extradata) {
 896         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
 897         return 1;
 898     }
 899
 900     f->version = AV_RL32(avctx->extradata) >> 16;
 901     common_init(avctx);
 902     init_vlcs(f);
 903
 904     if (f->version > 2)
 905         avctx->pix_fmt = AV_PIX_FMT_RGB565;
 906     else
 907         avctx->pix_fmt = AV_PIX_FMT_BGR555;
 908
 909     return 0;
 910 }
 911
 912
 913 static av_cold int decode_end(AVCodecContext *avctx)
 914 {
 915     FourXContext * const f = avctx->priv_data;
 916     int i;
 917
 918     av_freep(&f->bitstream_buffer);
 919     f->bitstream_buffer_size = 0;
 920     for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 921         av_freep(&f->cfrm[i].data);
 922         f->cfrm[i].allocated_size = 0;
 923     }
 924     ff_free_vlc(&f->pre_vlc);
 925     if (f->current_picture.data[0])
 926         avctx->release_buffer(avctx, &f->current_picture);
 927     if (f->last_picture.data[0])
 928         avctx->release_buffer(avctx, &f->last_picture);
 929
 930     return 0;
 931 }
 932
 933 AVCodec ff_fourxm_decoder = {
 934     .name           = "4xm",
 935     .type           = AVMEDIA_TYPE_VIDEO,
 936     .id             = AV_CODEC_ID_4XM,
 937     .priv_data_size = sizeof(FourXContext),
 938     .init           = decode_init,
 939     .close          = decode_end,
 940     .decode         = decode_frame,
 941     .capabilities   = CODEC_CAP_DR1,
 942     .long_name      = NULL_IF_CONFIG_SMALL("4X Movie"),
 943 };