git.sesse.net Git - ffmpeg/blob - libavcodec/4xm.c

   1 /*
   2  * 4XM codec
   3  * Copyright (c) 2003 Michael Niedermayer
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * 4XM codec.
  25  */
  26
  27 #include "libavutil/frame.h"
  28 #include "libavutil/intreadwrite.h"
  29 #include "avcodec.h"
  30 #include "bytestream.h"
  31 #include "dsputil.h"
  32 #include "get_bits.h"
  33 #include "internal.h"
  34
  35 #define BLOCK_TYPE_VLC_BITS 5
  36 #define ACDC_VLC_BITS 9
  37
  38 #define CFRAME_BUFFER_COUNT 100
  39
  40 static const uint8_t block_type_tab[2][4][8][2] = {
  41     {
  42         {    // { 8, 4, 2 } x { 8, 4, 2}
  43             { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 30, 5 }, { 31, 5 }, { 0, 0 }
  44         }, { // { 8, 4 } x 1
  45             { 0, 1 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  46         }, { // 1 x { 8, 4 }
  47             { 0, 1 }, { 2, 2 }, { 0, 0 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  48         }, { // 1 x 2, 2 x 1
  49             { 0, 1 }, { 0, 0 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }
  50         }
  51     }, {
  52         {   // { 8, 4, 2 } x { 8, 4, 2}
  53             { 1, 2 }, { 4, 3 }, { 5, 3 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  54         }, {// { 8, 4 } x 1
  55             { 1, 2 }, { 0, 0 }, { 2, 2 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  56         }, {// 1 x { 8, 4 }
  57             { 1, 2 }, { 2, 2 }, { 0, 0 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  58         }, {// 1 x 2, 2 x 1
  59             { 1, 2 }, { 0, 0 }, { 0, 0 }, { 0, 2 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
  60       }
  61     }
  62 };
  63
  64 static const uint8_t size2index[4][4] = {
  65     { -1, 3, 1, 1 },
  66     {  3, 0, 0, 0 },
  67     {  2, 0, 0, 0 },
  68     {  2, 0, 0, 0 },
  69 };
  70
  71 static const int8_t mv[256][2] = {
  72     {   0,   0 }, {   0,  -1 }, {  -1,   0 }, {   1,   0 }, {   0,   1 }, {  -1,  -1 }, {   1,  -1 }, {  -1,   1 },
  73     {   1,   1 }, {   0,  -2 }, {  -2,   0 }, {   2,   0 }, {   0,   2 }, {  -1,  -2 }, {   1,  -2 }, {  -2,  -1 },
  74     {   2,  -1 }, {  -2,   1 }, {   2,   1 }, {  -1,   2 }, {   1,   2 }, {  -2,  -2 }, {   2,  -2 }, {  -2,   2 },
  75     {   2,   2 }, {   0,  -3 }, {  -3,   0 }, {   3,   0 }, {   0,   3 }, {  -1,  -3 }, {   1,  -3 }, {  -3,  -1 },
  76     {   3,  -1 }, {  -3,   1 }, {   3,   1 }, {  -1,   3 }, {   1,   3 }, {  -2,  -3 }, {   2,  -3 }, {  -3,  -2 },
  77     {   3,  -2 }, {  -3,   2 }, {   3,   2 }, {  -2,   3 }, {   2,   3 }, {   0,  -4 }, {  -4,   0 }, {   4,   0 },
  78     {   0,   4 }, {  -1,  -4 }, {   1,  -4 }, {  -4,  -1 }, {   4,  -1 }, {   4,   1 }, {  -1,   4 }, {   1,   4 },
  79     {  -3,  -3 }, {  -3,   3 }, {   3,   3 }, {  -2,  -4 }, {  -4,  -2 }, {   4,  -2 }, {  -4,   2 }, {  -2,   4 },
  80     {   2,   4 }, {  -3,  -4 }, {   3,  -4 }, {   4,  -3 }, {  -5,   0 }, {  -4,   3 }, {  -3,   4 }, {   3,   4 },
  81     {  -1,  -5 }, {  -5,  -1 }, {  -5,   1 }, {  -1,   5 }, {  -2,  -5 }, {   2,  -5 }, {   5,  -2 }, {   5,   2 },
  82     {  -4,  -4 }, {  -4,   4 }, {  -3,  -5 }, {  -5,  -3 }, {  -5,   3 }, {   3,   5 }, {  -6,   0 }, {   0,   6 },
  83     {  -6,  -1 }, {  -6,   1 }, {   1,   6 }, {   2,  -6 }, {  -6,   2 }, {   2,   6 }, {  -5,  -4 }, {   5,   4 },
  84     {   4,   5 }, {  -6,  -3 }, {   6,   3 }, {  -7,   0 }, {  -1,  -7 }, {   5,  -5 }, {  -7,   1 }, {  -1,   7 },
  85     {   4,  -6 }, {   6,   4 }, {  -2,  -7 }, {  -7,   2 }, {  -3,  -7 }, {   7,  -3 }, {   3,   7 }, {   6,  -5 },
  86     {   0,  -8 }, {  -1,  -8 }, {  -7,  -4 }, {  -8,   1 }, {   4,   7 }, {   2,  -8 }, {  -2,   8 }, {   6,   6 },
  87     {  -8,   3 }, {   5,  -7 }, {  -5,   7 }, {   8,  -4 }, {   0,  -9 }, {  -9,  -1 }, {   1,   9 }, {   7,  -6 },
  88     {  -7,   6 }, {  -5,  -8 }, {  -5,   8 }, {  -9,   3 }, {   9,  -4 }, {   7,  -7 }, {   8,  -6 }, {   6,   8 },
  89     {  10,   1 }, { -10,   2 }, {   9,  -5 }, {  10,  -3 }, {  -8,  -7 }, { -10,  -4 }, {   6,  -9 }, { -11,   0 },
  90     {  11,   1 }, { -11,  -2 }, {  -2,  11 }, {   7,  -9 }, {  -7,   9 }, {  10,   6 }, {  -4,  11 }, {   8,  -9 },
  91     {   8,   9 }, {   5,  11 }, {   7, -10 }, {  12,  -3 }, {  11,   6 }, {  -9,  -9 }, {   8,  10 }, {   5,  12 },
  92     { -11,   7 }, {  13,   2 }, {   6, -12 }, {  10,   9 }, { -11,   8 }, {  -7,  12 }, {   0,  14 }, {  14,  -2 },
  93     {  -9,  11 }, {  -6,  13 }, { -14,  -4 }, {  -5, -14 }, {   5,  14 }, { -15,  -1 }, { -14,  -6 }, {   3, -15 },
  94     {  11, -11 }, {  -7,  14 }, {  -5,  15 }, {   8, -14 }, {  15,   6 }, {   3,  16 }, {   7, -15 }, { -16,   5 },
  95     {   0,  17 }, { -16,  -6 }, { -10,  14 }, { -16,   7 }, {  12,  13 }, { -16,   8 }, { -17,   6 }, { -18,   3 },
  96     {  -7,  17 }, {  15,  11 }, {  16,  10 }, {   2, -19 }, {   3, -19 }, { -11, -16 }, { -18,   8 }, { -19,  -6 },
  97     {   2, -20 }, { -17, -11 }, { -10, -18 }, {   8,  19 }, { -21,  -1 }, { -20,   7 }, {  -4,  21 }, {  21,   5 },
  98     {  15,  16 }, {   2, -22 }, { -10, -20 }, { -22,   5 }, {  20, -11 }, {  -7, -22 }, { -12,  20 }, {  23,  -5 },
  99     {  13, -20 }, {  24,  -2 }, { -15,  19 }, { -11,  22 }, {  16,  19 }, {  23, -10 }, { -18, -18 }, {  -9, -24 },
 100     {  24, -10 }, {  -3,  26 }, { -23,  13 }, { -18, -20 }, {  17,  21 }, {  -4,  27 }, {  27,   6 }, {   1, -28 },
 101     { -11,  26 }, { -17, -23 }, {   7,  28 }, {  11, -27 }, {  29,   5 }, { -23, -19 }, { -28, -11 }, { -21,  22 },
 102     { -30,   7 }, { -17,  26 }, { -27,  16 }, {  13,  29 }, {  19, -26 }, {  10, -31 }, { -14, -30 }, {  20, -27 },
 103     { -29,  18 }, { -16, -31 }, { -28, -22 }, {  21, -30 }, { -25,  28 }, {  26, -29 }, {  25, -32 }, { -32, -32 }
 104 };
 105
 106 /* This is simply the scaled down elementwise product of the standard JPEG
 107  * quantizer table and the AAN premul table. */
 108 static const uint8_t dequant_table[64] = {
 109     16, 15, 13, 19, 24, 31, 28, 17,
 110     17, 23, 25, 31, 36, 63, 45, 21,
 111     18, 24, 27, 37, 52, 59, 49, 20,
 112     16, 28, 34, 40, 60, 80, 51, 20,
 113     18, 31, 48, 66, 68, 86, 56, 21,
 114     19, 38, 56, 59, 64, 64, 48, 20,
 115     27, 48, 55, 55, 56, 51, 35, 15,
 116     20, 35, 34, 32, 31, 22, 15,  8,
 117 };
 118
 119 static VLC block_type_vlc[2][4];
 120
 121
 122 typedef struct CFrameBuffer {
 123     unsigned int allocated_size;
 124     unsigned int size;
 125     int id;
 126     uint8_t *data;
 127 } CFrameBuffer;
 128
 129 typedef struct FourXContext {
 130     AVCodecContext *avctx;
 131     DSPContext dsp;
 132     AVFrame *last_picture;
 133     GetBitContext pre_gb;          ///< ac/dc prefix
 134     GetBitContext gb;
 135     GetByteContext g;
 136     GetByteContext g2;
 137     int mv[256];
 138     VLC pre_vlc;
 139     int last_dc;
 140     DECLARE_ALIGNED(16, int16_t, block)[6][64];
 141     void *bitstream_buffer;
 142     unsigned int bitstream_buffer_size;
 143     int version;
 144     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 145 } FourXContext;
 146
 147
 148 #define FIX_1_082392200  70936
 149 #define FIX_1_414213562  92682
 150 #define FIX_1_847759065 121095
 151 #define FIX_2_613125930 171254
 152
 153 #define MULTIPLY(var, const) (((var) * (const)) >> 16)
 154
 155 static void idct(int16_t block[64])
 156 {
 157     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 158     int tmp10, tmp11, tmp12, tmp13;
 159     int z5, z10, z11, z12, z13;
 160     int i;
 161     int temp[64];
 162
 163     for (i = 0; i < 8; i++) {
 164         tmp10 = block[8 * 0 + i] + block[8 * 4 + i];
 165         tmp11 = block[8 * 0 + i] - block[8 * 4 + i];
 166
 167         tmp13 = block[8 * 2 + i] + block[8 * 6 + i];
 168         tmp12 = MULTIPLY(block[8 * 2 + i] - block[8 * 6 + i], FIX_1_414213562) - tmp13;
 169
 170         tmp0 = tmp10 + tmp13;
 171         tmp3 = tmp10 - tmp13;
 172         tmp1 = tmp11 + tmp12;
 173         tmp2 = tmp11 - tmp12;
 174
 175         z13 = block[8 * 5 + i] + block[8 * 3 + i];
 176         z10 = block[8 * 5 + i] - block[8 * 3 + i];
 177         z11 = block[8 * 1 + i] + block[8 * 7 + i];
 178         z12 = block[8 * 1 + i] - block[8 * 7 + i];
 179
 180         tmp7  =          z11 + z13;
 181         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 182
 183         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 184         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 185         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 186
 187         tmp6 = tmp12 - tmp7;
 188         tmp5 = tmp11 - tmp6;
 189         tmp4 = tmp10 + tmp5;
 190
 191         temp[8 * 0 + i] = tmp0 + tmp7;
 192         temp[8 * 7 + i] = tmp0 - tmp7;
 193         temp[8 * 1 + i] = tmp1 + tmp6;
 194         temp[8 * 6 + i] = tmp1 - tmp6;
 195         temp[8 * 2 + i] = tmp2 + tmp5;
 196         temp[8 * 5 + i] = tmp2 - tmp5;
 197         temp[8 * 4 + i] = tmp3 + tmp4;
 198         temp[8 * 3 + i] = tmp3 - tmp4;
 199     }
 200
 201     for (i = 0; i < 8 * 8; i += 8) {
 202         tmp10 = temp[0 + i] + temp[4 + i];
 203         tmp11 = temp[0 + i] - temp[4 + i];
 204
 205         tmp13 = temp[2 + i] + temp[6 + i];
 206         tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
 207
 208         tmp0 = tmp10 + tmp13;
 209         tmp3 = tmp10 - tmp13;
 210         tmp1 = tmp11 + tmp12;
 211         tmp2 = tmp11 - tmp12;
 212
 213         z13 = temp[5 + i] + temp[3 + i];
 214         z10 = temp[5 + i] - temp[3 + i];
 215         z11 = temp[1 + i] + temp[7 + i];
 216         z12 = temp[1 + i] - temp[7 + i];
 217
 218         tmp7  = z11 + z13;
 219         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 220
 221         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 222         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 223         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 224
 225         tmp6 = tmp12 - tmp7;
 226         tmp5 = tmp11 - tmp6;
 227         tmp4 = tmp10 + tmp5;
 228
 229         block[0 + i] = (tmp0 + tmp7) >> 6;
 230         block[7 + i] = (tmp0 - tmp7) >> 6;
 231         block[1 + i] = (tmp1 + tmp6) >> 6;
 232         block[6 + i] = (tmp1 - tmp6) >> 6;
 233         block[2 + i] = (tmp2 + tmp5) >> 6;
 234         block[5 + i] = (tmp2 - tmp5) >> 6;
 235         block[4 + i] = (tmp3 + tmp4) >> 6;
 236         block[3 + i] = (tmp3 - tmp4) >> 6;
 237     }
 238 }
 239
 240 static av_cold void init_vlcs(FourXContext *f)
 241 {
 242     static VLC_TYPE table[2][4][32][2];
 243     int i, j;
 244
 245     for (i = 0; i < 2; i++) {
 246         for (j = 0; j < 4; j++) {
 247             block_type_vlc[i][j].table           = table[i][j];
 248             block_type_vlc[i][j].table_allocated = 32;
 249             init_vlc(&block_type_vlc[i][j], BLOCK_TYPE_VLC_BITS, 7,
 250                      &block_type_tab[i][j][0][1], 2, 1,
 251                      &block_type_tab[i][j][0][0], 2, 1,
 252                      INIT_VLC_USE_NEW_STATIC);
 253         }
 254     }
 255 }
 256
 257 static void init_mv(FourXContext *f, int linesize)
 258 {
 259     int i;
 260
 261     for (i = 0; i < 256; i++) {
 262         if (f->version > 1)
 263             f->mv[i] = mv[i][0] + mv[i][1] * linesize / 2;
 264         else
 265             f->mv[i] = (i & 15) - 8 + ((i >> 4) - 8) * linesize / 2;
 266     }
 267 }
 268
 269 #if HAVE_BIGENDIAN
 270 #define LE_CENTRIC_MUL(dst, src, scale, dc)             \
 271     {                                                   \
 272         unsigned tmpval = AV_RN32(src);                 \
 273         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 274         tmpval = tmpval * (scale) + (dc);               \
 275         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 276         AV_WN32A(dst, tmpval);                          \
 277     }
 278 #else
 279 #define LE_CENTRIC_MUL(dst, src, scale, dc)              \
 280     {                                                    \
 281         unsigned tmpval = AV_RN32(src) * (scale) + (dc); \
 282         AV_WN32A(dst, tmpval);                           \
 283     }
 284 #endif
 285
 286 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w,
 287                         int h, int stride, int scale, unsigned dc)
 288 {
 289     int i;
 290     dc *= 0x10001;
 291
 292     switch (log2w) {
 293     case 0:
 294         for (i = 0; i < h; i++) {
 295             dst[0] = scale * src[0] + dc;
 296             if (scale)
 297                 src += stride;
 298             dst += stride;
 299         }
 300         break;
 301     case 1:
 302         for (i = 0; i < h; i++) {
 303             LE_CENTRIC_MUL(dst, src, scale, dc);
 304             if (scale)
 305                 src += stride;
 306             dst += stride;
 307         }
 308         break;
 309     case 2:
 310         for (i = 0; i < h; i++) {
 311             LE_CENTRIC_MUL(dst, src, scale, dc);
 312             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 313             if (scale)
 314                 src += stride;
 315             dst += stride;
 316         }
 317         break;
 318     case 3:
 319         for (i = 0; i < h; i++) {
 320             LE_CENTRIC_MUL(dst,     src,     scale, dc);
 321             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 322             LE_CENTRIC_MUL(dst + 4, src + 4, scale, dc);
 323             LE_CENTRIC_MUL(dst + 6, src + 6, scale, dc);
 324             if (scale)
 325                 src += stride;
 326             dst += stride;
 327         }
 328         break;
 329     default:
 330         assert(0);
 331     }
 332 }
 333
 334 static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src,
 335                            int log2w, int log2h, int stride)
 336 {
 337     const int index = size2index[log2h][log2w];
 338     const int h     = 1 << log2h;
 339     int code        = get_vlc2(&f->gb,
 340                                block_type_vlc[1 - (f->version > 1)][index].table,
 341                                BLOCK_TYPE_VLC_BITS, 1);
 342     uint16_t *start = (uint16_t *)f->last_picture->data[0];
 343     uint16_t *end   = start + stride * (f->avctx->height - h + 1) - (1 << log2w);
 344
 345     assert(code >= 0 && code <= 6);
 346
 347     if (code == 0) {
 348         src += f->mv[bytestream2_get_byte(&f->g)];
 349         if (start > src || src > end) {
 350             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 351             return;
 352         }
 353         mcdc(dst, src, log2w, h, stride, 1, 0);
 354     } else if (code == 1) {
 355         log2h--;
 356         decode_p_block(f, dst, src, log2w, log2h, stride);
 357         decode_p_block(f, dst + (stride << log2h),
 358                           src + (stride << log2h), log2w, log2h, stride);
 359     } else if (code == 2) {
 360         log2w--;
 361         decode_p_block(f, dst , src, log2w, log2h, stride);
 362         decode_p_block(f, dst + (1 << log2w),
 363                           src + (1 << log2w), log2w, log2h, stride);
 364     } else if (code == 3 && f->version < 2) {
 365         mcdc(dst, src, log2w, h, stride, 1, 0);
 366     } else if (code == 4) {
 367         src += f->mv[bytestream2_get_byte(&f->g)];
 368         if (start > src || src > end) {
 369             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 370             return;
 371         }
 372         mcdc(dst, src, log2w, h, stride, 1, bytestream2_get_le16(&f->g2));
 373     } else if (code == 5) {
 374         mcdc(dst, src, log2w, h, stride, 0, bytestream2_get_le16(&f->g2));
 375     } else if (code == 6) {
 376         if (log2w) {
 377             dst[0]      = bytestream2_get_le16(&f->g2);
 378             dst[1]      = bytestream2_get_le16(&f->g2);
 379         } else {
 380             dst[0]      = bytestream2_get_le16(&f->g2);
 381             dst[stride] = bytestream2_get_le16(&f->g2);
 382         }
 383     }
 384 }
 385
 386 static int decode_p_frame(FourXContext *f, AVFrame *frame,
 387                           const uint8_t *buf, int length)
 388 {
 389     int x, y;
 390     const int width  = f->avctx->width;
 391     const int height = f->avctx->height;
 392     uint16_t *src    = (uint16_t *)f->last_picture->data[0];
 393     uint16_t *dst    = (uint16_t *)frame->data[0];
 394     const int stride =             frame->linesize[0] >> 1;
 395     unsigned int bitstream_size, bytestream_size, wordstream_size, extra,
 396                  bytestream_offset, wordstream_offset;
 397
 398     if (f->version > 1) {
 399         extra           = 20;
 400         bitstream_size  = AV_RL32(buf + 8);
 401         wordstream_size = AV_RL32(buf + 12);
 402         bytestream_size = AV_RL32(buf + 16);
 403     } else {
 404         extra           = 0;
 405         bitstream_size  = AV_RL16(buf - 4);
 406         wordstream_size = AV_RL16(buf - 2);
 407         bytestream_size = FFMAX(length - bitstream_size - wordstream_size, 0);
 408     }
 409
 410     if (bitstream_size + bytestream_size + wordstream_size + extra != length
 411         || bitstream_size  > (1 << 26)
 412         || bytestream_size > (1 << 26)
 413         || wordstream_size > (1 << 26)) {
 414         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n",
 415                bitstream_size, bytestream_size, wordstream_size,
 416                bitstream_size + bytestream_size + wordstream_size - length);
 417         return AVERROR_INVALIDDATA;
 418     }
 419
 420     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 421                    bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 422     if (!f->bitstream_buffer)
 423         return AVERROR(ENOMEM);
 424     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)(buf + extra),
 425                      bitstream_size / 4);
 426     memset((uint8_t*)f->bitstream_buffer + bitstream_size,
 427            0, FF_INPUT_BUFFER_PADDING_SIZE);
 428     init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size);
 429
 430     wordstream_offset = extra + bitstream_size;
 431     bytestream_offset = extra + bitstream_size + wordstream_size;
 432     bytestream2_init(&f->g2, buf + wordstream_offset,
 433                      length - wordstream_offset);
 434     bytestream2_init(&f->g, buf + bytestream_offset,
 435                      length - bytestream_offset);
 436
 437     init_mv(f, frame->linesize[0]);
 438
 439     for (y = 0; y < height; y += 8) {
 440         for (x = 0; x < width; x += 8)
 441             decode_p_block(f, dst + x, src + x, 3, 3, stride);
 442         src += 8 * stride;
 443         dst += 8 * stride;
 444     }
 445
 446     return 0;
 447 }
 448
 449 /**
 450  * decode block and dequantize.
 451  * Note this is almost identical to MJPEG.
 452  */
 453 static int decode_i_block(FourXContext *f, int16_t *block)
 454 {
 455     int code, i, j, level, val;
 456
 457     /* DC coef */
 458     val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 459     if (val >> 4)
 460         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 461
 462     if (val)
 463         val = get_xbits(&f->gb, val);
 464
 465     val        = val * dequant_table[0] + f->last_dc;
 466     f->last_dc = block[0] = val;
 467     /* AC coefs */
 468     i = 1;
 469     for (;;) {
 470         code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 471
 472         /* EOB */
 473         if (code == 0)
 474             break;
 475         if (code == 0xf0) {
 476             i += 16;
 477         } else {
 478             level = get_xbits(&f->gb, code & 0xf);
 479             i    += code >> 4;
 480             if (i >= 64) {
 481                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
 482                 return 0;
 483             }
 484
 485             j = ff_zigzag_direct[i];
 486             block[j] = level * dequant_table[j];
 487             i++;
 488             if (i >= 64)
 489                 break;
 490         }
 491     }
 492
 493     return 0;
 494 }
 495
 496 static inline void idct_put(FourXContext *f, AVFrame *frame, int x, int y)
 497 {
 498     int16_t (*block)[64] = f->block;
 499     int stride           = frame->linesize[0] >> 1;
 500     int i;
 501     uint16_t *dst = ((uint16_t*)frame->data[0]) + y * stride + x;
 502
 503     for (i = 0; i < 4; i++) {
 504         block[i][0] += 0x80 * 8 * 8;
 505         idct(block[i]);
 506     }
 507
 508     if (!(f->avctx->flags & CODEC_FLAG_GRAY)) {
 509         for (i = 4; i < 6; i++)
 510             idct(block[i]);
 511     }
 512
 513     /* Note transform is:
 514      * y  = ( 1b + 4g + 2r) / 14
 515      * cb = ( 3b - 2g - 1r) / 14
 516      * cr = (-1b - 4g + 5r) / 14 */
 517     for (y = 0; y < 8; y++) {
 518         for (x = 0; x < 8; x++) {
 519             int16_t *temp = block[(x >> 2) + 2 * (y >> 2)] +
 520                             2 * (x & 3) + 2 * 8 * (y & 3); // FIXME optimize
 521             int cb = block[4][x + 8 * y];
 522             int cr = block[5][x + 8 * y];
 523             int cg = (cb + cr) >> 1;
 524             int y;
 525
 526             cb += cb;
 527
 528             y               = temp[0];
 529             dst[0]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 530             y               = temp[1];
 531             dst[1]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 532             y               = temp[8];
 533             dst[stride]     = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 534             y               = temp[9];
 535             dst[1 + stride] = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 536             dst            += 2;
 537         }
 538         dst += 2 * stride - 2 * 8;
 539     }
 540 }
 541
 542 static int decode_i_mb(FourXContext *f)
 543 {
 544     int ret;
 545     int i;
 546
 547     f->dsp.clear_blocks(f->block[0]);
 548
 549     for (i = 0; i < 6; i++)
 550         if ((ret = decode_i_block(f, f->block[i])) < 0)
 551             return ret;
 552
 553     return 0;
 554 }
 555
 556 static const uint8_t *read_huffman_tables(FourXContext *f,
 557                                           const uint8_t * const buf)
 558 {
 559     int frequency[512] = { 0 };
 560     uint8_t flag[512];
 561     int up[512];
 562     uint8_t len_tab[257];
 563     int bits_tab[257];
 564     int start, end;
 565     const uint8_t *ptr = buf;
 566     int j;
 567
 568     memset(up, -1, sizeof(up));
 569
 570     start = *ptr++;
 571     end   = *ptr++;
 572     for (;;) {
 573         int i;
 574
 575         for (i = start; i <= end; i++)
 576             frequency[i] = *ptr++;
 577         start = *ptr++;
 578         if (start == 0)
 579             break;
 580
 581         end = *ptr++;
 582     }
 583     frequency[256] = 1;
 584
 585     while ((ptr - buf) & 3)
 586         ptr++; // 4byte align
 587
 588     for (j = 257; j < 512; j++) {
 589         int min_freq[2] = { 256 * 256, 256 * 256 };
 590         int smallest[2] = { 0, 0 };
 591         int i;
 592         for (i = 0; i < j; i++) {
 593             if (frequency[i] == 0)
 594                 continue;
 595             if (frequency[i] < min_freq[1]) {
 596                 if (frequency[i] < min_freq[0]) {
 597                     min_freq[1] = min_freq[0];
 598                     smallest[1] = smallest[0];
 599                     min_freq[0] = frequency[i];
 600                     smallest[0] = i;
 601                 } else {
 602                     min_freq[1] = frequency[i];
 603                     smallest[1] = i;
 604                 }
 605             }
 606         }
 607         if (min_freq[1] == 256 * 256)
 608             break;
 609
 610         frequency[j]           = min_freq[0] + min_freq[1];
 611         flag[smallest[0]]      = 0;
 612         flag[smallest[1]]      = 1;
 613         up[smallest[0]]        =
 614         up[smallest[1]]        = j;
 615         frequency[smallest[0]] = frequency[smallest[1]] = 0;
 616     }
 617
 618     for (j = 0; j < 257; j++) {
 619         int node, len = 0, bits = 0;
 620
 621         for (node = j; up[node] != -1; node = up[node]) {
 622             bits += flag[node] << len;
 623             len++;
 624             if (len > 31)
 625                 // can this happen at all ?
 626                 av_log(f->avctx, AV_LOG_ERROR,
 627                        "vlc length overflow\n");
 628         }
 629
 630         bits_tab[j] = bits;
 631         len_tab[j]  = len;
 632     }
 633
 634     if (init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, len_tab, 1, 1,
 635                  bits_tab, 4, 4, 0))
 636         return NULL;
 637
 638     return ptr;
 639 }
 640
 641 static int mix(int c0, int c1)
 642 {
 643     int blue  =  2 * (c0 & 0x001F) + (c1 & 0x001F);
 644     int green = (2 * (c0 & 0x03E0) + (c1 & 0x03E0)) >> 5;
 645     int red   =  2 * (c0 >> 10)    + (c1 >> 10);
 646     return red / 3 * 1024 + green / 3 * 32 + blue / 3;
 647 }
 648
 649 static int decode_i2_frame(FourXContext *f, AVFrame *frame, const uint8_t *buf, int length)
 650 {
 651     int x, y, x2, y2;
 652     const int width  = f->avctx->width;
 653     const int height = f->avctx->height;
 654     const int mbs    = (FFALIGN(width, 16) >> 4) * (FFALIGN(height, 16) >> 4);
 655     uint16_t *dst    = (uint16_t*)frame->data[0];
 656     const int stride =            frame->linesize[0]>>1;
 657     GetByteContext g3;
 658
 659     if (length < mbs * 8) {
 660         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 661         return AVERROR_INVALIDDATA;
 662     }
 663     bytestream2_init(&g3, buf, length);
 664
 665     for (y = 0; y < height; y += 16) {
 666         for (x = 0; x < width; x += 16) {
 667             unsigned int color[4] = { 0 }, bits;
 668             // warning following is purely guessed ...
 669             color[0] = bytestream2_get_le16u(&g3);
 670             color[1] = bytestream2_get_le16u(&g3);
 671
 672             if (color[0] & 0x8000)
 673                 av_log(NULL, AV_LOG_ERROR, "unk bit 1\n");
 674             if (color[1] & 0x8000)
 675                 av_log(NULL, AV_LOG_ERROR, "unk bit 2\n");
 676
 677             color[2] = mix(color[0], color[1]);
 678             color[3] = mix(color[1], color[0]);
 679
 680             bits = bytestream2_get_le32u(&g3);
 681             for (y2 = 0; y2 < 16; y2++) {
 682                 for (x2 = 0; x2 < 16; x2++) {
 683                     int index = 2 * (x2 >> 2) + 8 * (y2 >> 2);
 684                     dst[y2 * stride + x2] = color[(bits >> index) & 3];
 685                 }
 686             }
 687             dst += 16;
 688         }
 689         dst += 16 * stride - x;
 690     }
 691
 692     return 0;
 693 }
 694
 695 static int decode_i_frame(FourXContext *f, AVFrame *frame, const uint8_t *buf, int length)
 696 {
 697     int x, y, ret;
 698     const int width  = f->avctx->width;
 699     const int height = f->avctx->height;
 700     const unsigned int bitstream_size = AV_RL32(buf);
 701     int token_count av_unused;
 702     unsigned int prestream_size;
 703     const uint8_t *prestream;
 704
 705     if (length < bitstream_size + 12) {
 706         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 707         return AVERROR_INVALIDDATA;
 708     }
 709
 710     token_count    =     AV_RL32(buf + bitstream_size + 8);
 711     prestream_size = 4 * AV_RL32(buf + bitstream_size + 4);
 712     prestream      =             buf + bitstream_size + 12;
 713
 714     if (prestream_size + bitstream_size + 12 != length
 715         || bitstream_size > (1 << 26)
 716         || prestream_size > (1 << 26)) {
 717         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n",
 718                prestream_size, bitstream_size, length);
 719         return AVERROR_INVALIDDATA;
 720     }
 721
 722     prestream = read_huffman_tables(f, prestream);
 723     if (!prestream) {
 724         av_log(f->avctx, AV_LOG_ERROR, "Error reading Huffman tables.\n");
 725         return AVERROR_INVALIDDATA;
 726     }
 727
 728     init_get_bits(&f->gb, buf + 4, 8 * bitstream_size);
 729
 730     prestream_size = length + buf - prestream;
 731
 732     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 733                    prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 734     if (!f->bitstream_buffer)
 735         return AVERROR(ENOMEM);
 736     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)prestream,
 737                      prestream_size / 4);
 738     memset((uint8_t*)f->bitstream_buffer + prestream_size,
 739            0, FF_INPUT_BUFFER_PADDING_SIZE);
 740     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size);
 741
 742     f->last_dc = 0 * 128 * 8 * 8;
 743
 744     for (y = 0; y < height; y += 16) {
 745         for (x = 0; x < width; x += 16) {
 746             if ((ret = decode_i_mb(f)) < 0)
 747                 return ret;
 748
 749             idct_put(f, frame, x, y);
 750         }
 751     }
 752
 753     if (get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
 754         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 755
 756     return 0;
 757 }
 758
 759 static int decode_frame(AVCodecContext *avctx, void *data,
 760                         int *got_frame, AVPacket *avpkt)
 761 {
 762     const uint8_t *buf    = avpkt->data;
 763     int buf_size          = avpkt->size;
 764     FourXContext *const f = avctx->priv_data;
 765     AVFrame *picture      = data;
 766     int i, frame_4cc, frame_size, ret;
 767
 768     frame_4cc = AV_RL32(buf);
 769     if (buf_size != AV_RL32(buf + 4) + 8 || buf_size < 20)
 770         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n",
 771                buf_size, AV_RL32(buf + 4));
 772
 773     if (frame_4cc == AV_RL32("cfrm")) {
 774         int free_index       = -1;
 775         const int data_size  = buf_size - 20;
 776         const int id         = AV_RL32(buf + 12);
 777         const int whole_size = AV_RL32(buf + 16);
 778         CFrameBuffer *cfrm;
 779
 780         for (i = 0; i < CFRAME_BUFFER_COUNT; i++)
 781             if (f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
 782                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n",
 783                        f->cfrm[i].id);
 784
 785         for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 786             if (f->cfrm[i].id == id)
 787                 break;
 788             if (f->cfrm[i].size == 0)
 789                 free_index = i;
 790         }
 791
 792         if (i >= CFRAME_BUFFER_COUNT) {
 793             i             = free_index;
 794             f->cfrm[i].id = id;
 795         }
 796         cfrm = &f->cfrm[i];
 797
 798         cfrm->data = av_fast_realloc(cfrm->data, &cfrm->allocated_size,
 799                                      cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
 800         // explicit check needed as memcpy below might not catch a NULL
 801         if (!cfrm->data) {
 802             av_log(f->avctx, AV_LOG_ERROR, "realloc failure");
 803             return AVERROR(ENOMEM);
 804         }
 805
 806         memcpy(cfrm->data + cfrm->size, buf + 20, data_size);
 807         cfrm->size += data_size;
 808
 809         if (cfrm->size >= whole_size) {
 810             buf        = cfrm->data;
 811             frame_size = cfrm->size;
 812
 813             if (id != avctx->frame_number)
 814                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n",
 815                        id, avctx->frame_number);
 816
 817             cfrm->size = cfrm->id = 0;
 818             frame_4cc  = AV_RL32("pfrm");
 819         } else
 820             return buf_size;
 821     } else {
 822         buf        = buf      + 12;
 823         frame_size = buf_size - 12;
 824     }
 825
 826     // alternatively we would have to use our own buffer management
 827     avctx->flags |= CODEC_FLAG_EMU_EDGE;
 828
 829     if ((ret = ff_get_buffer(avctx, picture, AV_GET_BUFFER_FLAG_REF)) < 0) {
 830         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 831         return ret;
 832     }
 833
 834     if (frame_4cc == AV_RL32("ifr2")) {
 835         picture->pict_type = AV_PICTURE_TYPE_I;
 836         if ((ret = decode_i2_frame(f, picture, buf - 4, frame_size + 4)) < 0)
 837             return ret;
 838     } else if (frame_4cc == AV_RL32("ifrm")) {
 839         picture->pict_type = AV_PICTURE_TYPE_I;
 840         if ((ret = decode_i_frame(f, picture, buf, frame_size)) < 0)
 841             return ret;
 842     } else if (frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")) {
 843         if (!f->last_picture->data[0]) {
 844             if ((ret = ff_get_buffer(avctx, f->last_picture,
 845                                      AV_GET_BUFFER_FLAG_REF)) < 0) {
 846                 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 847                 return ret;
 848             }
 849             memset(f->last_picture->data[0], 0, avctx->height * FFABS(f->last_picture->linesize[0]));
 850         }
 851
 852         picture->pict_type = AV_PICTURE_TYPE_P;
 853         if ((ret = decode_p_frame(f, picture, buf, frame_size)) < 0)
 854             return ret;
 855     } else if (frame_4cc == AV_RL32("snd_")) {
 856         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n",
 857                buf_size);
 858     } else {
 859         av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n",
 860                buf_size);
 861     }
 862
 863     picture->key_frame = picture->pict_type == AV_PICTURE_TYPE_I;
 864
 865     av_frame_unref(f->last_picture);
 866     if ((ret = av_frame_ref(f->last_picture, picture)) < 0)
 867         return ret;
 868     *got_frame = 1;
 869
 870     emms_c();
 871
 872     return buf_size;
 873 }
 874
 875 static av_cold int decode_init(AVCodecContext *avctx)
 876 {
 877     FourXContext * const f = avctx->priv_data;
 878
 879     if (avctx->extradata_size != 4 || !avctx->extradata) {
 880         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
 881         return 1;
 882     }
 883
 884     f->version = AV_RL32(avctx->extradata) >> 16;
 885     ff_dsputil_init(&f->dsp, avctx);
 886     f->avctx = avctx;
 887     init_vlcs(f);
 888
 889     if (f->version > 2)
 890         avctx->pix_fmt = AV_PIX_FMT_RGB565;
 891     else
 892         avctx->pix_fmt = AV_PIX_FMT_BGR555;
 893
 894     f->last_picture = av_frame_alloc();
 895     if (!f->last_picture)
 896         return AVERROR(ENOMEM);
 897
 898     return 0;
 899 }
 900
 901
 902 static av_cold int decode_end(AVCodecContext *avctx)
 903 {
 904     FourXContext * const f = avctx->priv_data;
 905     int i;
 906
 907     av_freep(&f->bitstream_buffer);
 908     f->bitstream_buffer_size = 0;
 909     for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 910         av_freep(&f->cfrm[i].data);
 911         f->cfrm[i].allocated_size = 0;
 912     }
 913     ff_free_vlc(&f->pre_vlc);
 914     av_frame_free(&f->last_picture);
 915
 916     return 0;
 917 }
 918
 919 AVCodec ff_fourxm_decoder = {
 920     .name           = "4xm",
 921     .type           = AVMEDIA_TYPE_VIDEO,
 922     .id             = AV_CODEC_ID_4XM,
 923     .priv_data_size = sizeof(FourXContext),
 924     .init           = decode_init,
 925     .close          = decode_end,
 926     .decode         = decode_frame,
 927     .capabilities   = CODEC_CAP_DR1,
 928     .long_name      = NULL_IF_CONFIG_SMALL("4X Movie"),
 929 };