git.sesse.net Git - ffmpeg/blob - libavcodec/hcadec.c

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with FFmpeg; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17  */
  18
  19 #include "libavutil/crc.h"
  20 #include "libavutil/float_dsp.h"
  21 #include "libavutil/intreadwrite.h"
  22 #include "libavutil/mem_internal.h"
  23 #include "libavutil/tx.h"
  24
  25 #include "avcodec.h"
  26 #include "get_bits.h"
  27 #include "internal.h"
  28 #include "hca_data.h"
  29
  30 typedef struct ChannelContext {
  31     float    base[128];
  32     DECLARE_ALIGNED(32, float, imdct_in)[128];
  33     DECLARE_ALIGNED(32, float, imdct_out)[128];
  34     DECLARE_ALIGNED(32, float, imdct_prev)[128];
  35     int8_t   scale_factors[128];
  36     uint8_t  scale[128];
  37     int8_t   intensity[8];
  38     int8_t  *hfr_scale;
  39     unsigned count;
  40     int      chan_type;
  41 } ChannelContext;
  42
  43 typedef struct HCAContext {
  44     GetBitContext gb;
  45
  46     const AVCRC *crc_table;
  47
  48     ChannelContext ch[16];
  49
  50     uint8_t ath[128];
  51
  52     int     ath_type;
  53     unsigned hfr_group_count;
  54     uint8_t track_count;
  55     uint8_t channel_config;
  56     uint8_t total_band_count;
  57     uint8_t base_band_count;
  58     uint8_t stereo_band_count;
  59     uint8_t bands_per_hfr_group;
  60
  61     av_tx_fn           tx_fn;
  62     AVTXContext       *tx_ctx;
  63     AVFloatDSPContext *fdsp;
  64 } HCAContext;
  65
  66 static void ath_init1(uint8_t *ath, int sample_rate)
  67 {
  68     unsigned int index;
  69     unsigned int acc = 0;
  70
  71     for (int i = 0; i < 128; i++) {
  72         acc += sample_rate;
  73         index = acc >> 13;
  74
  75         if (index >= 654) {
  76             memset(ath+i, 0xFF, (128 - i));
  77             break;
  78         }
  79
  80         ath[i] = ath_base_curve[index];
  81     }
  82 }
  83
  84 static int ath_init(uint8_t *ath, int type, int sample_rate)
  85 {
  86     switch (type) {
  87     case 0:
  88         /* nothing to do */
  89         break;
  90     case 1:
  91         ath_init1(ath, sample_rate);
  92         break;
  93     default:
  94         return AVERROR_INVALIDDATA;
  95     }
  96
  97     return 0;
  98 }
  99
 100 static inline unsigned ceil2(unsigned a, unsigned b)
 101 {
 102     return (b > 0) ? (a / b + ((a % b) ? 1 : 0)) : 0;
 103 }
 104
 105 static av_cold int decode_init(AVCodecContext *avctx)
 106 {
 107     HCAContext *c = avctx->priv_data;
 108     GetBitContext *gb = &c->gb;
 109     int8_t r[16] = { 0 };
 110     float scale = 1.f / 8.f;
 111     unsigned b, chunk;
 112     int version, ret;
 113
 114     avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
 115     c->crc_table = av_crc_get_table(AV_CRC_16_ANSI);
 116
 117     if (avctx->channels <= 0 || avctx->channels > 16)
 118         return AVERROR(EINVAL);
 119
 120     ret = init_get_bits8(gb, avctx->extradata, avctx->extradata_size);
 121     if (ret < 0)
 122         return ret;
 123     skip_bits_long(gb, 32);
 124     version = get_bits(gb, 16);
 125     skip_bits_long(gb, 16);
 126
 127     c->ath_type = version >= 0x200 ? 0 : 1;
 128
 129     if (get_bits_long(gb, 32) != MKBETAG('f', 'm', 't', 0))
 130         return AVERROR_INVALIDDATA;
 131     skip_bits_long(gb, 32);
 132     skip_bits_long(gb, 32);
 133     skip_bits_long(gb, 32);
 134
 135     chunk = get_bits_long(gb, 32);
 136     if (chunk == MKBETAG('c', 'o', 'm', 'p')) {
 137         skip_bits_long(gb, 16);
 138         skip_bits_long(gb, 8);
 139         skip_bits_long(gb, 8);
 140         c->track_count = get_bits(gb, 8);
 141         c->channel_config = get_bits(gb, 8);
 142         c->total_band_count = get_bits(gb, 8);
 143         c->base_band_count = get_bits(gb, 8);
 144         c->stereo_band_count = get_bits(gb, 8);
 145         c->bands_per_hfr_group = get_bits(gb, 8);
 146     } else if (chunk == MKBETAG('d', 'e', 'c', 0)) {
 147         skip_bits_long(gb, 16);
 148         skip_bits_long(gb, 8);
 149         skip_bits_long(gb, 8);
 150         c->total_band_count = get_bits(gb, 8) + 1;
 151         c->base_band_count = get_bits(gb, 8) + 1;
 152         c->track_count = get_bits(gb, 4);
 153         c->channel_config = get_bits(gb, 4);
 154         if (!get_bits(gb, 8))
 155             c->base_band_count = c->total_band_count;
 156         c->stereo_band_count = c->total_band_count - c->base_band_count;
 157         c->bands_per_hfr_group = 0;
 158     } else
 159         return AVERROR_INVALIDDATA;
 160
 161     if (c->total_band_count > FF_ARRAY_ELEMS(c->ch->imdct_in))
 162         return AVERROR_INVALIDDATA;
 163
 164
 165     while (get_bits_left(gb) >= 32) {
 166         chunk = get_bits_long(gb, 32);
 167         if (chunk == MKBETAG('v', 'b', 'r', 0)) {
 168             skip_bits_long(gb, 16);
 169             skip_bits_long(gb, 16);
 170         } else if (chunk == MKBETAG('a', 't', 'h', 0)) {
 171             c->ath_type = get_bits(gb, 16);
 172         } else if (chunk == MKBETAG('r', 'v', 'a', 0)) {
 173             skip_bits_long(gb, 32);
 174         } else if (chunk == MKBETAG('c', 'o', 'm', 'm')) {
 175             skip_bits_long(gb, get_bits(gb, 8) * 8);
 176         } else if (chunk == MKBETAG('c', 'i', 'p', 'h')) {
 177             skip_bits_long(gb, 16);
 178         } else if (chunk == MKBETAG('l', 'o', 'o', 'p')) {
 179             skip_bits_long(gb, 32);
 180             skip_bits_long(gb, 32);
 181             skip_bits_long(gb, 16);
 182             skip_bits_long(gb, 16);
 183         } else if (chunk == MKBETAG('p', 'a', 'd', 0)) {
 184             break;
 185         } else {
 186             break;
 187         }
 188     }
 189
 190     ret = ath_init(c->ath, c->ath_type, avctx->sample_rate);
 191     if (ret < 0)
 192         return ret;
 193
 194     if (!c->track_count)
 195         c->track_count = 1;
 196
 197     b = avctx->channels / c->track_count;
 198     if (c->stereo_band_count && b > 1) {
 199         int8_t *x = r;
 200
 201         for (int i = 0; i < c->track_count; i++, x+=b) {
 202             switch (b) {
 203             case 2:
 204             case 3:
 205                 x[0] = 1;
 206                 x[1] = 2;
 207                 break;
 208             case 4:
 209                 x[0]=1; x[1] = 2;
 210                 if (c->channel_config == 0) {
 211                     x[2]=1;
 212                     x[3]=2;
 213                 }
 214                 break;
 215             case 5:
 216                 x[0]=1; x[1] = 2;
 217                 if (c->channel_config <= 2) {
 218                     x[3]=1;
 219                     x[4]=2;
 220                 }
 221                 break;
 222             case 6:
 223             case 7:
 224                 x[0] = 1; x[1] = 2; x[4] = 1; x[5] = 2;
 225                 break;
 226             case 8:
 227                 x[0] = 1; x[1] = 2; x[4] = 1; x[5] = 2; x[6] = 1; x[7] = 2;
 228                 break;
 229             }
 230         }
 231     }
 232
 233     if (c->total_band_count < c->base_band_count)
 234         return AVERROR_INVALIDDATA;
 235
 236     c->hfr_group_count = ceil2(c->total_band_count - (c->base_band_count + c->stereo_band_count),
 237                                c->bands_per_hfr_group);
 238
 239     if (c->base_band_count + c->stereo_band_count + (unsigned long)c->hfr_group_count > 128ULL)
 240         return AVERROR_INVALIDDATA;
 241
 242     for (int i = 0; i < avctx->channels; i++) {
 243         c->ch[i].chan_type = r[i];
 244         c->ch[i].count     = c->base_band_count + ((r[i] != 2) ? c->stereo_band_count : 0);
 245         c->ch[i].hfr_scale = &c->ch[i].scale_factors[c->base_band_count + c->stereo_band_count];
 246         if (c->ch[i].count > 128)
 247             return AVERROR_INVALIDDATA;
 248     }
 249
 250     c->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
 251     if (!c->fdsp)
 252         return AVERROR(ENOMEM);
 253
 254     return av_tx_init(&c->tx_ctx, &c->tx_fn, AV_TX_FLOAT_MDCT, 1, 128, &scale, 0);
 255 }
 256
 257 static void run_imdct(HCAContext *c, ChannelContext *ch, int index, float *out)
 258 {
 259     c->tx_fn(c->tx_ctx, ch->imdct_out, ch->imdct_in, sizeof(float));
 260
 261     c->fdsp->vector_fmul_window(out, ch->imdct_prev + (128 >> 1),
 262                                 ch->imdct_out, window, 128 >> 1);
 263
 264     memcpy(ch->imdct_prev, ch->imdct_out, 128 * sizeof(float));
 265 }
 266
 267 static void apply_intensity_stereo(HCAContext *s, ChannelContext *ch1, ChannelContext *ch2,
 268                                    int index, unsigned band_count, unsigned base_band_count,
 269                                    unsigned stereo_band_count)
 270 {
 271     float ratio_l = intensity_ratio_table[ch2->intensity[index]];
 272     float ratio_r = ratio_l - 2.0f;
 273     float *c1 = &ch1->imdct_in[base_band_count];
 274     float *c2 = &ch2->imdct_in[base_band_count];
 275
 276     if (ch1->chan_type != 1 || !stereo_band_count)
 277         return;
 278
 279     for (int i = 0; i < band_count; i++) {
 280         *(c2++)  = *c1 * ratio_r;
 281         *(c1++) *= ratio_l;
 282     }
 283 }
 284
 285 static void reconstruct_hfr(HCAContext *s, ChannelContext *ch,
 286                             unsigned hfr_group_count,
 287                             unsigned bands_per_hfr_group,
 288                             unsigned start_band, unsigned total_band_count)
 289 {
 290     if (ch->chan_type == 2 || !bands_per_hfr_group)
 291         return;
 292
 293     for (int i = 0, k = start_band, l = start_band - 1; i < hfr_group_count; i++){
 294         for (int j = 0; j < bands_per_hfr_group && k < total_band_count && l >= 0; j++, k++, l--){
 295             ch->imdct_in[k] = scale_conversion_table[ scale_conv_bias +
 296                 av_clip_intp2(ch->hfr_scale[i] - ch->scale_factors[l], 6) ] * ch->imdct_in[l];
 297         }
 298     }
 299
 300     ch->imdct_in[127] = 0;
 301 }
 302
 303 static void dequantize_coefficients(HCAContext *c, ChannelContext *ch)
 304 {
 305     GetBitContext *gb = &c->gb;
 306
 307     for (int i = 0; i < ch->count; i++) {
 308         unsigned scale = ch->scale[i];
 309         int nb_bits = max_bits_table[scale];
 310         int value = get_bitsz(gb, nb_bits);
 311         float factor;
 312
 313         if (scale > 7) {
 314             value = (1 - ((value & 1) << 1)) * (value >> 1);
 315             if (!value)
 316                 skip_bits_long(gb, -1);
 317             factor = value;
 318         } else {
 319             value += scale << 4;
 320             skip_bits_long(gb, quant_spectrum_bits[value] - nb_bits);
 321             factor = quant_spectrum_value[value];
 322         }
 323         ch->imdct_in[i] = factor * ch->base[i];
 324     }
 325
 326     memset(ch->imdct_in + ch->count, 0,  sizeof(ch->imdct_in) - ch->count * sizeof(ch->imdct_in[0]));
 327 }
 328
 329 static void unpack(HCAContext *c, ChannelContext *ch,
 330                    unsigned hfr_group_count,
 331                    int packed_noise_level,
 332                    const uint8_t *ath)
 333 {
 334     GetBitContext *gb = &c->gb;
 335     int delta_bits = get_bits(gb, 3);
 336
 337     if (delta_bits > 5) {
 338         for (int i = 0; i < ch->count; i++)
 339             ch->scale_factors[i] = get_bits(gb, 6);
 340     } else if (delta_bits) {
 341         int factor = get_bits(gb, 6);
 342         int max_value = (1 << delta_bits) - 1;
 343         int half_max = max_value >> 1;
 344
 345         ch->scale_factors[0] = factor;
 346         for (int i = 1; i < ch->count; i++){
 347             int delta = get_bits(gb, delta_bits);
 348
 349             if (delta == max_value) {
 350                 factor = get_bits(gb, 6);
 351             } else {
 352                 factor += delta - half_max;
 353             }
 354             factor = av_clip_uintp2(factor, 6);
 355
 356             ch->scale_factors[i] = factor;
 357         }
 358     } else {
 359         memset(ch->scale_factors, 0, 128);
 360     }
 361
 362     if (ch->chan_type == 2){
 363         ch->intensity[0] = get_bits(gb, 4);
 364         if (ch->intensity[0] < 15) {
 365             for (int i = 1; i < 8; i++)
 366                 ch->intensity[i] = get_bits(gb, 4);
 367         }
 368     } else {
 369         for (int i = 0; i < hfr_group_count; i++)
 370             ch->hfr_scale[i] = get_bits(gb, 6);
 371     }
 372
 373     for (int i = 0; i < ch->count; i++) {
 374         int scale = ch->scale_factors[i];
 375
 376         if (scale) {
 377             scale = c->ath[i] + ((packed_noise_level + i) >> 8) - ((scale * 5) >> 1) + 2;
 378             scale = scale_table[av_clip(scale, 0, 58)];
 379         }
 380         ch->scale[i] = scale;
 381     }
 382
 383     memset(ch->scale + ch->count, 0, sizeof(ch->scale) - ch->count);
 384
 385     for (int i = 0; i < ch->count; i++)
 386         ch->base[i] = dequantizer_scaling_table[ch->scale_factors[i]] * quant_step_size[ch->scale[i]];
 387 }
 388
 389 static int decode_frame(AVCodecContext *avctx, void *data,
 390                         int *got_frame_ptr, AVPacket *avpkt)
 391 {
 392     AVFrame *frame = data;
 393     HCAContext *c = avctx->priv_data;
 394     int ch, ret, packed_noise_level;
 395     GetBitContext *gb = &c->gb;
 396     float **samples;
 397
 398     if (avctx->err_recognition & AV_EF_CRCCHECK) {
 399         if (av_crc(c->crc_table, 0, avpkt->data, avpkt->size))
 400             return AVERROR_INVALIDDATA;
 401     }
 402
 403     if ((ret = init_get_bits8(gb, avpkt->data, avpkt->size)) < 0)
 404         return ret;
 405
 406     if (get_bits(gb, 16) != 0xFFFF)
 407         return AVERROR_INVALIDDATA;
 408
 409     frame->nb_samples = 1024;
 410     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
 411         return ret;
 412     samples = (float **)frame->extended_data;
 413
 414     packed_noise_level = (get_bits(gb, 9) << 8) - get_bits(gb, 7);
 415
 416     for (ch = 0; ch < avctx->channels; ch++)
 417         unpack(c, &c->ch[ch], c->hfr_group_count, packed_noise_level, c->ath);
 418
 419     for (int i = 0; i < 8; i++) {
 420         for (ch = 0; ch < avctx->channels; ch++)
 421             dequantize_coefficients(c, &c->ch[ch]);
 422         for (ch = 0; ch < avctx->channels; ch++)
 423             reconstruct_hfr(c, &c->ch[ch], c->hfr_group_count, c->bands_per_hfr_group,
 424                             c->stereo_band_count + c->base_band_count, c->total_band_count);
 425         for (ch = 0; ch < avctx->channels - 1; ch++)
 426             apply_intensity_stereo(c, &c->ch[ch], &c->ch[ch+1], i,
 427                                    c->total_band_count - c->base_band_count,
 428                                    c->base_band_count, c->stereo_band_count);
 429         for (ch = 0; ch < avctx->channels; ch++)
 430             run_imdct(c, &c->ch[ch], i, samples[ch] + i * 128);
 431     }
 432
 433     *got_frame_ptr = 1;
 434
 435     return avpkt->size;
 436 }
 437
 438 static av_cold int decode_close(AVCodecContext *avctx)
 439 {
 440     HCAContext *c = avctx->priv_data;
 441
 442     av_freep(&c->fdsp);
 443     av_tx_uninit(&c->tx_ctx);
 444
 445     return 0;
 446 }
 447
 448 const AVCodec ff_hca_decoder = {
 449     .name           = "hca",
 450     .long_name      = NULL_IF_CONFIG_SMALL("CRI HCA"),
 451     .type           = AVMEDIA_TYPE_AUDIO,
 452     .id             = AV_CODEC_ID_HCA,
 453     .priv_data_size = sizeof(HCAContext),
 454     .init           = decode_init,
 455     .decode         = decode_frame,
 456     .close          = decode_close,
 457     .capabilities   = AV_CODEC_CAP_DR1,
 458     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
 459                                                       AV_SAMPLE_FMT_NONE },
 460 };