git.sesse.net Git - ffmpeg/blob - libavcodec/speedhq.c

   1 /*
   2  * NewTek SpeedHQ codec
   3  * Copyright 2017 Steinar H. Gunderson
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * NewTek SpeedHQ decoder.
  25  */
  26
  27 #define BITSTREAM_READER_LE
  28
  29 #include "config.h"
  30 #include "libavutil/attributes.h"
  31 #include "libavutil/mem_internal.h"
  32
  33 #include "avcodec.h"
  34 #include "get_bits.h"
  35 #include "internal.h"
  36 #include "libavutil/thread.h"
  37 #include "mathops.h"
  38 #include "mpeg12.h"
  39 #include "mpeg12data.h"
  40 #include "mpeg12vlc.h"
  41
  42 #define MAX_INDEX (64 - 1)
  43
  44 /*
  45  * 5 bits makes for very small tables, with no more than two lookups needed
  46  * for the longest (10-bit) codes.
  47  */
  48 #define ALPHA_VLC_BITS 5
  49
  50 typedef struct SHQContext {
  51     AVCodecContext *avctx;
  52     BlockDSPContext bdsp;
  53     IDCTDSPContext idsp;
  54     ScanTable intra_scantable;
  55     int quant_matrix[64];
  56     enum { SHQ_SUBSAMPLING_420, SHQ_SUBSAMPLING_422, SHQ_SUBSAMPLING_444 }
  57         subsampling;
  58     enum { SHQ_NO_ALPHA, SHQ_RLE_ALPHA, SHQ_DCT_ALPHA } alpha_type;
  59 } SHQContext;
  60
  61
  62 /* AC codes: Very similar but not identical to MPEG-2. */
  63 static const uint16_t speedhq_vlc[123][2] = {
  64     {0x0001,  2}, {0x0003,  3}, {0x000E,  4}, {0x0007,  5},
  65     {0x0017,  5}, {0x0028,  6}, {0x0008,  6}, {0x006F,  7},
  66     {0x001F,  7}, {0x00C4,  8}, {0x0044,  8}, {0x005F,  8},
  67     {0x00DF,  8}, {0x007F,  8}, {0x00FF,  8}, {0x3E00, 14},
  68     {0x1E00, 14}, {0x2E00, 14}, {0x0E00, 14}, {0x3600, 14},
  69     {0x1600, 14}, {0x2600, 14}, {0x0600, 14}, {0x3A00, 14},
  70     {0x1A00, 14}, {0x2A00, 14}, {0x0A00, 14}, {0x3200, 14},
  71     {0x1200, 14}, {0x2200, 14}, {0x0200, 14}, {0x0C00, 15},
  72     {0x7400, 15}, {0x3400, 15}, {0x5400, 15}, {0x1400, 15},
  73     {0x6400, 15}, {0x2400, 15}, {0x4400, 15}, {0x0400, 15},
  74     {0x0002,  3}, {0x000C,  5}, {0x004F,  7}, {0x00E4,  8},
  75     {0x0004,  8}, {0x0D00, 13}, {0x1500, 13}, {0x7C00, 15},
  76     {0x3C00, 15}, {0x5C00, 15}, {0x1C00, 15}, {0x6C00, 15},
  77     {0x2C00, 15}, {0x4C00, 15}, {0xC800, 16}, {0x4800, 16},
  78     {0x8800, 16}, {0x0800, 16}, {0x0300, 13}, {0x1D00, 13},
  79     {0x0014,  5}, {0x0070,  7}, {0x003F,  8}, {0x00C0, 10},
  80     {0x0500, 13}, {0x0180, 12}, {0x0280, 12}, {0x0C80, 12},
  81     {0x0080, 12}, {0x0B00, 13}, {0x1300, 13}, {0x001C,  5},
  82     {0x0064,  8}, {0x0380, 12}, {0x1900, 13}, {0x0D80, 12},
  83     {0x0018,  6}, {0x00BF,  8}, {0x0480, 12}, {0x0B80, 12},
  84     {0x0038,  6}, {0x0040,  9}, {0x0900, 13}, {0x0030,  7},
  85     {0x0780, 12}, {0x2800, 16}, {0x0010,  7}, {0x0A80, 12},
  86     {0x0050,  7}, {0x0880, 12}, {0x000F,  7}, {0x1100, 13},
  87     {0x002F,  7}, {0x0100, 13}, {0x0084,  8}, {0x5800, 16},
  88     {0x00A4,  8}, {0x9800, 16}, {0x0024,  8}, {0x1800, 16},
  89     {0x0140,  9}, {0xE800, 16}, {0x01C0,  9}, {0x6800, 16},
  90     {0x02C0, 10}, {0xA800, 16}, {0x0F80, 12}, {0x0580, 12},
  91     {0x0980, 12}, {0x0E80, 12}, {0x0680, 12}, {0x1F00, 13},
  92     {0x0F00, 13}, {0x1700, 13}, {0x0700, 13}, {0x1B00, 13},
  93     {0xF800, 16}, {0x7800, 16}, {0xB800, 16}, {0x3800, 16},
  94     {0xD800, 16},
  95     {0x0020,  6}, /* escape */
  96     {0x0006,  4}  /* EOB */
  97 };
  98
  99 static const uint8_t speedhq_level[121] = {
 100      1,  2,  3,  4,  5,  6,  7,  8,
 101      9, 10, 11, 12, 13, 14, 15, 16,
 102     17, 18, 19, 20, 21, 22, 23, 24,
 103     25, 26, 27, 28, 29, 30, 31, 32,
 104     33, 34, 35, 36, 37, 38, 39, 40,
 105      1,  2,  3,  4,  5,  6,  7,  8,
 106      9, 10, 11, 12, 13, 14, 15, 16,
 107     17, 18, 19, 20,  1,  2,  3,  4,
 108      5,  6,  7,  8,  9, 10, 11,  1,
 109      2,  3,  4,  5,  1,  2,  3,  4,
 110      1,  2,  3,  1,  2,  3,  1,  2,
 111      1,  2,  1,  2,  1,  2,  1,  2,
 112      1,  2,  1,  2,  1,  2,  1,  2,
 113      1,  2,  1,  1,  1,  1,  1,  1,
 114      1,  1,  1,  1,  1,  1,  1,  1,
 115      1,
 116 };
 117
 118 static const uint8_t speedhq_run[121] = {
 119      0,  0,  0,  0,  0,  0,  0,  0,
 120      0,  0,  0,  0,  0,  0,  0,  0,
 121      0,  0,  0,  0,  0,  0,  0,  0,
 122      0,  0,  0,  0,  0,  0,  0,  0,
 123      0,  0,  0,  0,  0,  0,  0,  0,
 124      1,  1,  1,  1,  1,  1,  1,  1,
 125      1,  1,  1,  1,  1,  1,  1,  1,
 126      1,  1,  1,  1,  2,  2,  2,  2,
 127      2,  2,  2,  2,  2,  2,  2,  3,
 128      3,  3,  3,  3,  4,  4,  4,  4,
 129      5,  5,  5,  6,  6,  6,  7,  7,
 130      8,  8,  9,  9, 10, 10, 11, 11,
 131     12, 12, 13, 13, 14, 14, 15, 15,
 132     16, 16, 17, 18, 19, 20, 21, 22,
 133     23, 24, 25, 26, 27, 28, 29, 30,
 134     31,
 135 };
 136
 137 RLTable ff_rl_speedhq = {
 138     121,
 139     121,
 140     speedhq_vlc,
 141     speedhq_run,
 142     speedhq_level,
 143 };
 144
 145 #if CONFIG_SPEEDHQ_DECODER
 146 /* NOTE: The first element is always 16, unscaled. */
 147 static const uint8_t unscaled_quant_matrix[64] = {
 148     16, 16, 19, 22, 26, 27, 29, 34,
 149     16, 16, 22, 24, 27, 29, 34, 37,
 150     19, 22, 26, 27, 29, 34, 34, 38,
 151     22, 22, 26, 27, 29, 34, 37, 40,
 152     22, 26, 27, 29, 32, 35, 40, 48,
 153     26, 27, 29, 32, 35, 40, 48, 58,
 154     26, 27, 29, 34, 38, 46, 56, 69,
 155     27, 29, 35, 38, 46, 56, 69, 83
 156 };
 157
 158 static uint8_t speedhq_static_rl_table_store[2][2*MAX_RUN + MAX_LEVEL + 3];
 159
 160 static VLC dc_lum_vlc_le;
 161 static VLC dc_chroma_vlc_le;
 162 static VLC dc_alpha_run_vlc_le;
 163 static VLC dc_alpha_level_vlc_le;
 164
 165 static inline int decode_dc_le(GetBitContext *gb, int component)
 166 {
 167     int code, diff;
 168
 169     if (component == 0 || component == 3) {
 170         code = get_vlc2(gb, dc_lum_vlc_le.table, DC_VLC_BITS, 2);
 171     } else {
 172         code = get_vlc2(gb, dc_chroma_vlc_le.table, DC_VLC_BITS, 2);
 173     }
 174     if (!code) {
 175         diff = 0;
 176     } else {
 177         diff = get_xbits_le(gb, code);
 178     }
 179     return diff;
 180 }
 181
 182 static inline int decode_alpha_block(const SHQContext *s, GetBitContext *gb, uint8_t last_alpha[16], uint8_t *dest, int linesize)
 183 {
 184     uint8_t block[128];
 185     int i = 0, x, y;
 186
 187     memset(block, 0, sizeof(block));
 188
 189     {
 190         OPEN_READER(re, gb);
 191
 192         for ( ;; ) {
 193             int run, level;
 194
 195             UPDATE_CACHE_LE(re, gb);
 196             GET_VLC(run, re, gb, dc_alpha_run_vlc_le.table, ALPHA_VLC_BITS, 2);
 197
 198             if (run < 0) break;
 199             i += run;
 200             if (i >= 128)
 201                 return AVERROR_INVALIDDATA;
 202
 203             UPDATE_CACHE_LE(re, gb);
 204             GET_VLC(level, re, gb, dc_alpha_level_vlc_le.table, ALPHA_VLC_BITS, 2);
 205             block[i++] = level;
 206         }
 207
 208         CLOSE_READER(re, gb);
 209     }
 210
 211     for (y = 0; y < 8; y++) {
 212         for (x = 0; x < 16; x++) {
 213             last_alpha[x] -= block[y * 16 + x];
 214         }
 215         memcpy(dest, last_alpha, 16);
 216         dest += linesize;
 217     }
 218
 219     return 0;
 220 }
 221
 222 static inline int decode_dct_block(const SHQContext *s, GetBitContext *gb, int last_dc[4], int component, uint8_t *dest, int linesize)
 223 {
 224     const int *quant_matrix = s->quant_matrix;
 225     const uint8_t *scantable = s->intra_scantable.permutated;
 226     LOCAL_ALIGNED_32(int16_t, block, [64]);
 227     int dc_offset;
 228
 229     s->bdsp.clear_block(block);
 230
 231     dc_offset = decode_dc_le(gb, component);
 232     last_dc[component] -= dc_offset;  /* Note: Opposite of most codecs. */
 233     block[scantable[0]] = last_dc[component];  /* quant_matrix[0] is always 16. */
 234
 235     /* Read AC coefficients. */
 236     {
 237         int i = 0;
 238         OPEN_READER(re, gb);
 239         for ( ;; ) {
 240             int level, run;
 241             UPDATE_CACHE_LE(re, gb);
 242             GET_RL_VLC(level, run, re, gb, ff_rl_speedhq.rl_vlc[0],
 243                        TEX_VLC_BITS, 2, 0);
 244             if (level == 127) {
 245                 break;
 246             } else if (level) {
 247                 i += run;
 248                 if (i > MAX_INDEX)
 249                     return AVERROR_INVALIDDATA;
 250                 /* If next bit is 1, level = -level */
 251                 level = (level ^ SHOW_SBITS(re, gb, 1)) -
 252                         SHOW_SBITS(re, gb, 1);
 253                 LAST_SKIP_BITS(re, gb, 1);
 254             } else {
 255                 /* Escape. */
 256 #if MIN_CACHE_BITS < 6 + 6 + 12
 257 #error MIN_CACHE_BITS is too small for the escape code, add UPDATE_CACHE
 258 #endif
 259                 run = SHOW_UBITS(re, gb, 6) + 1;
 260                 SKIP_BITS(re, gb, 6);
 261                 level = SHOW_UBITS(re, gb, 12) - 2048;
 262                 LAST_SKIP_BITS(re, gb, 12);
 263
 264                 i += run;
 265                 if (i > MAX_INDEX)
 266                     return AVERROR_INVALIDDATA;
 267             }
 268
 269             block[scantable[i]] = (level * quant_matrix[i]) >> 4;
 270         }
 271         CLOSE_READER(re, gb);
 272     }
 273
 274     s->idsp.idct_put(dest, linesize, block);
 275
 276     return 0;
 277 }
 278
 279 static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf_size, AVFrame *frame, int field_number, int start, int end, int line_stride)
 280 {
 281     int ret, slice_number, slice_offsets[5];
 282     int linesize_y  = frame->linesize[0] * line_stride;
 283     int linesize_cb = frame->linesize[1] * line_stride;
 284     int linesize_cr = frame->linesize[2] * line_stride;
 285     int linesize_a;
 286
 287     if (s->alpha_type != SHQ_NO_ALPHA)
 288         linesize_a = frame->linesize[3] * line_stride;
 289
 290     if (end < start || end - start < 3 || end > buf_size)
 291         return AVERROR_INVALIDDATA;
 292
 293     slice_offsets[0] = start;
 294     slice_offsets[4] = end;
 295     for (slice_number = 1; slice_number < 4; slice_number++) {
 296         uint32_t last_offset, slice_len;
 297
 298         last_offset = slice_offsets[slice_number - 1];
 299         slice_len = AV_RL24(buf + last_offset);
 300         slice_offsets[slice_number] = last_offset + slice_len;
 301
 302         if (slice_len < 3 || slice_offsets[slice_number] > end - 3)
 303             return AVERROR_INVALIDDATA;
 304     }
 305
 306     for (slice_number = 0; slice_number < 4; slice_number++) {
 307         GetBitContext gb;
 308         uint32_t slice_begin, slice_end;
 309         int x, y;
 310
 311         slice_begin = slice_offsets[slice_number];
 312         slice_end = slice_offsets[slice_number + 1];
 313
 314         if ((ret = init_get_bits8(&gb, buf + slice_begin + 3, slice_end - slice_begin - 3)) < 0)
 315             return ret;
 316
 317         for (y = slice_number * 16 * line_stride; y < frame->height; y += line_stride * 64) {
 318             uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a;
 319             int last_dc[4] = { 1024, 1024, 1024, 1024 };
 320             uint8_t last_alpha[16];
 321
 322             memset(last_alpha, 255, sizeof(last_alpha));
 323
 324             dest_y = frame->data[0] + frame->linesize[0] * (y + field_number);
 325             if (s->subsampling == SHQ_SUBSAMPLING_420) {
 326                 dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number);
 327                 dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number);
 328             } else {
 329                 dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number);
 330                 dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number);
 331             }
 332             if (s->alpha_type != SHQ_NO_ALPHA) {
 333                 dest_a = frame->data[3] + frame->linesize[3] * (y + field_number);
 334             }
 335
 336             for (x = 0; x < frame->width; x += 16) {
 337                 /* Decode the four luma blocks. */
 338                 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y, linesize_y)) < 0)
 339                     return ret;
 340                 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8, linesize_y)) < 0)
 341                     return ret;
 342                 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0)
 343                     return ret;
 344                 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0)
 345                     return ret;
 346
 347                 /*
 348                  * Decode the first chroma block. For 4:2:0, this is the only one;
 349                  * for 4:2:2, it's the top block; for 4:4:4, it's the top-left block.
 350                  */
 351                 if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb, linesize_cb)) < 0)
 352                     return ret;
 353                 if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr, linesize_cr)) < 0)
 354                     return ret;
 355
 356                 if (s->subsampling != SHQ_SUBSAMPLING_420) {
 357                     /* For 4:2:2, this is the bottom block; for 4:4:4, it's the bottom-left block. */
 358                     if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0)
 359                         return ret;
 360                     if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0)
 361                         return ret;
 362
 363                     if (s->subsampling == SHQ_SUBSAMPLING_444) {
 364                         /* Top-right and bottom-right blocks. */
 365                         if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8, linesize_cb)) < 0)
 366                             return ret;
 367                         if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8, linesize_cr)) < 0)
 368                             return ret;
 369                         if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb + 8, linesize_cb)) < 0)
 370                             return ret;
 371                         if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr + 8, linesize_cr)) < 0)
 372                             return ret;
 373
 374                         dest_cb += 8;
 375                         dest_cr += 8;
 376                     }
 377                 }
 378                 dest_y += 16;
 379                 dest_cb += 8;
 380                 dest_cr += 8;
 381
 382                 if (s->alpha_type == SHQ_RLE_ALPHA) {
 383                     /* Alpha coded using 16x8 RLE blocks. */
 384                     if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a, linesize_a)) < 0)
 385                         return ret;
 386                     if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0)
 387                         return ret;
 388                     dest_a += 16;
 389                 } else if (s->alpha_type == SHQ_DCT_ALPHA) {
 390                     /* Alpha encoded exactly like luma. */
 391                     if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a, linesize_a)) < 0)
 392                         return ret;
 393                     if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8, linesize_a)) < 0)
 394                         return ret;
 395                     if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0)
 396                         return ret;
 397                     if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0)
 398                         return ret;
 399                     dest_a += 16;
 400                 }
 401             }
 402         }
 403     }
 404
 405     return 0;
 406 }
 407
 408 static void compute_quant_matrix(int *output, int qscale)
 409 {
 410     int i;
 411     for (i = 0; i < 64; i++) output[i] = unscaled_quant_matrix[ff_zigzag_direct[i]] * qscale;
 412 }
 413
 414 static int speedhq_decode_frame(AVCodecContext *avctx,
 415                                 void *data, int *got_frame,
 416                                 AVPacket *avpkt)
 417 {
 418     SHQContext * const s = avctx->priv_data;
 419     const uint8_t *buf   = avpkt->data;
 420     int buf_size         = avpkt->size;
 421     AVFrame *frame       = data;
 422     uint8_t quality;
 423     uint32_t second_field_offset;
 424     int ret;
 425
 426     if (buf_size < 4)
 427         return AVERROR_INVALIDDATA;
 428
 429     quality = buf[0];
 430     if (quality >= 100) {
 431         return AVERROR_INVALIDDATA;
 432     }
 433
 434     compute_quant_matrix(s->quant_matrix, 100 - quality);
 435
 436     second_field_offset = AV_RL24(buf + 1);
 437     if (second_field_offset >= buf_size - 3) {
 438         return AVERROR_INVALIDDATA;
 439     }
 440
 441     avctx->coded_width = FFALIGN(avctx->width, 16);
 442     avctx->coded_height = FFALIGN(avctx->height, 16);
 443
 444     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
 445         return ret;
 446     }
 447     frame->key_frame = 1;
 448
 449     if (second_field_offset == 4 || second_field_offset == (buf_size-4)) {
 450         /*
 451          * Overlapping first and second fields is used to signal
 452          * encoding only a single field. In this case, "height"
 453          * is ambiguous; it could mean either the height of the
 454          * frame as a whole, or of the field. The former would make
 455          * more sense for compatibility with legacy decoders,
 456          * but this matches the convention used in NDI, which is
 457          * the primary user of this trick.
 458          */
 459         if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, buf_size, 1)) < 0)
 460             return ret;
 461     } else {
 462         if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, second_field_offset, 2)) < 0)
 463             return ret;
 464         if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 1, second_field_offset, buf_size, 2)) < 0)
 465             return ret;
 466     }
 467
 468     *got_frame = 1;
 469     return buf_size;
 470 }
 471
 472 /*
 473  * Alpha VLC. Run and level are independently coded, and would be
 474  * outside the default limits for MAX_RUN/MAX_LEVEL, so we don't
 475  * bother with combining them into one table.
 476  */
 477 static av_cold void compute_alpha_vlcs(void)
 478 {
 479     uint16_t run_code[134], level_code[266];
 480     uint8_t run_bits[134], level_bits[266];
 481     int16_t run_symbols[134], level_symbols[266];
 482     int entry, i, sign;
 483
 484     /* Initialize VLC for alpha run. */
 485     entry = 0;
 486
 487     /* 0 -> 0. */
 488     run_code[entry] = 0;
 489     run_bits[entry] = 1;
 490     run_symbols[entry] = 0;
 491     ++entry;
 492
 493     /* 10xx -> xx plus 1. */
 494     for (i = 0; i < 4; ++i) {
 495         run_code[entry] = (i << 2) | 1;
 496         run_bits[entry] = 4;
 497         run_symbols[entry] = i + 1;
 498         ++entry;
 499     }
 500
 501     /* 111xxxxxxx -> xxxxxxx. */
 502     for (i = 0; i < 128; ++i) {
 503         run_code[entry] = (i << 3) | 7;
 504         run_bits[entry] = 10;
 505         run_symbols[entry] = i;
 506         ++entry;
 507     }
 508
 509     /* 110 -> EOB. */
 510     run_code[entry] = 3;
 511     run_bits[entry] = 3;
 512     run_symbols[entry] = -1;
 513     ++entry;
 514
 515     av_assert0(entry == FF_ARRAY_ELEMS(run_code));
 516
 517     INIT_LE_VLC_SPARSE_STATIC(&dc_alpha_run_vlc_le, ALPHA_VLC_BITS,
 518                               FF_ARRAY_ELEMS(run_code),
 519                               run_bits, 1, 1,
 520                               run_code, 2, 2,
 521                               run_symbols, 2, 2, 160);
 522
 523     /* Initialize VLC for alpha level. */
 524     entry = 0;
 525
 526     for (sign = 0; sign <= 1; ++sign) {
 527         /* 1s -> -1 or +1 (depending on sign bit). */
 528         level_code[entry] = (sign << 1) | 1;
 529         level_bits[entry] = 2;
 530         level_symbols[entry] = sign ? -1 : 1;
 531         ++entry;
 532
 533         /* 01sxx -> xx plus 2 (2..5 or -2..-5, depending on sign bit). */
 534         for (i = 0; i < 4; ++i) {
 535             level_code[entry] = (i << 3) | (sign << 2) | 2;
 536             level_bits[entry] = 5;
 537             level_symbols[entry] = sign ? -(i + 2) : (i + 2);
 538             ++entry;
 539         }
 540     }
 541
 542     /*
 543      * 00xxxxxxxx -> xxxxxxxx, in two's complement. There are many codes
 544      * here that would better be encoded in other ways (e.g. 0 would be
 545      * encoded by increasing run, and +/- 1 would be encoded with a
 546      * shorter code), but it doesn't hurt to allow everything.
 547      */
 548     for (i = 0; i < 256; ++i) {
 549         level_code[entry] = i << 2;
 550         level_bits[entry] = 10;
 551         level_symbols[entry] = i;
 552         ++entry;
 553     }
 554
 555     av_assert0(entry == FF_ARRAY_ELEMS(level_code));
 556
 557     INIT_LE_VLC_SPARSE_STATIC(&dc_alpha_level_vlc_le, ALPHA_VLC_BITS,
 558                               FF_ARRAY_ELEMS(level_code),
 559                               level_bits, 1, 1,
 560                               level_code, 2, 2,
 561                               level_symbols, 2, 2, 288);
 562 }
 563
 564 static av_cold void speedhq_static_init(void)
 565 {
 566     /* Exactly the same as MPEG-2, except for a little-endian reader. */
 567     INIT_CUSTOM_VLC_STATIC(&dc_lum_vlc_le, DC_VLC_BITS, 12,
 568                            ff_mpeg12_vlc_dc_lum_bits, 1, 1,
 569                            ff_mpeg12_vlc_dc_lum_code, 2, 2,
 570                            INIT_VLC_OUTPUT_LE, 512);
 571     INIT_CUSTOM_VLC_STATIC(&dc_chroma_vlc_le, DC_VLC_BITS, 12,
 572                            ff_mpeg12_vlc_dc_chroma_bits, 1, 1,
 573                            ff_mpeg12_vlc_dc_chroma_code, 2, 2,
 574                            INIT_VLC_OUTPUT_LE, 514);
 575
 576     ff_rl_init(&ff_rl_speedhq, speedhq_static_rl_table_store);
 577     INIT_2D_VLC_RL(ff_rl_speedhq, 674, INIT_VLC_LE);
 578
 579     compute_alpha_vlcs();
 580 }
 581
 582 static av_cold int speedhq_decode_init(AVCodecContext *avctx)
 583 {
 584     int ret;
 585     static AVOnce init_once = AV_ONCE_INIT;
 586     SHQContext * const s = avctx->priv_data;
 587
 588     s->avctx = avctx;
 589
 590     ret = ff_thread_once(&init_once, speedhq_static_init);
 591     if (ret)
 592         return AVERROR_UNKNOWN;
 593
 594     ff_blockdsp_init(&s->bdsp, avctx);
 595     ff_idctdsp_init(&s->idsp, avctx);
 596     ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
 597
 598     switch (avctx->codec_tag) {
 599     case MKTAG('S', 'H', 'Q', '0'):
 600         s->subsampling = SHQ_SUBSAMPLING_420;
 601         s->alpha_type = SHQ_NO_ALPHA;
 602         avctx->pix_fmt = AV_PIX_FMT_YUV420P;
 603         break;
 604     case MKTAG('S', 'H', 'Q', '1'):
 605         s->subsampling = SHQ_SUBSAMPLING_420;
 606         s->alpha_type = SHQ_RLE_ALPHA;
 607         avctx->pix_fmt = AV_PIX_FMT_YUVA420P;
 608         break;
 609     case MKTAG('S', 'H', 'Q', '2'):
 610         s->subsampling = SHQ_SUBSAMPLING_422;
 611         s->alpha_type = SHQ_NO_ALPHA;
 612         avctx->pix_fmt = AV_PIX_FMT_YUV422P;
 613         break;
 614     case MKTAG('S', 'H', 'Q', '3'):
 615         s->subsampling = SHQ_SUBSAMPLING_422;
 616         s->alpha_type = SHQ_RLE_ALPHA;
 617         avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
 618         break;
 619     case MKTAG('S', 'H', 'Q', '4'):
 620         s->subsampling = SHQ_SUBSAMPLING_444;
 621         s->alpha_type = SHQ_NO_ALPHA;
 622         avctx->pix_fmt = AV_PIX_FMT_YUV444P;
 623         break;
 624     case MKTAG('S', 'H', 'Q', '5'):
 625         s->subsampling = SHQ_SUBSAMPLING_444;
 626         s->alpha_type = SHQ_RLE_ALPHA;
 627         avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
 628         break;
 629     case MKTAG('S', 'H', 'Q', '7'):
 630         s->subsampling = SHQ_SUBSAMPLING_422;
 631         s->alpha_type = SHQ_DCT_ALPHA;
 632         avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
 633         break;
 634     case MKTAG('S', 'H', 'Q', '9'):
 635         s->subsampling = SHQ_SUBSAMPLING_444;
 636         s->alpha_type = SHQ_DCT_ALPHA;
 637         avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
 638         break;
 639     default:
 640         av_log(avctx, AV_LOG_ERROR, "Unknown NewTek SpeedHQ FOURCC provided (%08X)\n",
 641                avctx->codec_tag);
 642         return AVERROR_INVALIDDATA;
 643     }
 644
 645     /* This matches what NDI's RGB -> Y'CbCr 4:2:2 converter uses. */
 646     avctx->colorspace = AVCOL_SPC_BT470BG;
 647     avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
 648
 649     return 0;
 650 }
 651
 652 AVCodec ff_speedhq_decoder = {
 653     .name           = "speedhq",
 654     .long_name      = NULL_IF_CONFIG_SMALL("NewTek SpeedHQ"),
 655     .type           = AVMEDIA_TYPE_VIDEO,
 656     .id             = AV_CODEC_ID_SPEEDHQ,
 657     .priv_data_size = sizeof(SHQContext),
 658     .init           = speedhq_decode_init,
 659     .decode         = speedhq_decode_frame,
 660     .capabilities   = AV_CODEC_CAP_DR1,
 661 };
 662 #endif /* CONFIG_SPEEDHQ_DECODER */