git.sesse.net Git - ffmpeg/blob - libavcodec/alac.c

   1 /*
   2  * ALAC (Apple Lossless Audio Codec) decoder
   3  * Copyright (c) 2005 David Hammerton
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * ALAC (Apple Lossless Audio Codec) decoder
  25  * @author 2005 David Hammerton
  26  * @see http://crazney.net/programs/itunes/alac.html
  27  *
  28  * Note: This decoder expects a 36-byte QuickTime atom to be
  29  * passed through the extradata[_size] fields. This atom is tacked onto
  30  * the end of an 'alac' stsd atom and has the following format:
  31  *
  32  * 32bit  atom size
  33  * 32bit  tag                  ("alac")
  34  * 32bit  tag version          (0)
  35  * 32bit  samples per frame    (used when not set explicitly in the frames)
  36  *  8bit  compatible version   (0)
  37  *  8bit  sample size
  38  *  8bit  history mult         (40)
  39  *  8bit  initial history      (14)
  40  *  8bit  rice param limit     (10)
  41  *  8bit  channels
  42  * 16bit  maxRun               (255)
  43  * 32bit  max coded frame size (0 means unknown)
  44  * 32bit  average bitrate      (0 means unknown)
  45  * 32bit  samplerate
  46  */
  47
  48 #include "libavutil/channel_layout.h"
  49 #include "avcodec.h"
  50 #include "get_bits.h"
  51 #include "bytestream.h"
  52 #include "internal.h"
  53 #include "unary.h"
  54 #include "mathops.h"
  55 #include "alac_data.h"
  56
  57 #define ALAC_EXTRADATA_SIZE 36
  58
  59 typedef struct {
  60     AVCodecContext *avctx;
  61     AVFrame frame;
  62     GetBitContext gb;
  63     int channels;
  64
  65     int32_t *predict_error_buffer[2];
  66     int32_t *output_samples_buffer[2];
  67     int32_t *extra_bits_buffer[2];
  68
  69     uint32_t max_samples_per_frame;
  70     uint8_t  sample_size;
  71     uint8_t  rice_history_mult;
  72     uint8_t  rice_initial_history;
  73     uint8_t  rice_limit;
  74
  75     int extra_bits;     /**< number of extra bits beyond 16-bit */
  76     int nb_samples;     /**< number of samples in the current frame */
  77 } ALACContext;
  78
  79 static inline unsigned int decode_scalar(GetBitContext *gb, int k, int bps)
  80 {
  81     unsigned int x = get_unary_0_9(gb);
  82
  83     if (x > 8) { /* RICE THRESHOLD */
  84         /* use alternative encoding */
  85         x = get_bits_long(gb, bps);
  86     } else if (k != 1) {
  87         int extrabits = show_bits(gb, k);
  88
  89         /* multiply x by 2^k - 1, as part of their strange algorithm */
  90         x = (x << k) - x;
  91
  92         if (extrabits > 1) {
  93             x += extrabits - 1;
  94             skip_bits(gb, k);
  95         } else
  96             skip_bits(gb, k - 1);
  97     }
  98     return x;
  99 }
 100
 101 static void rice_decompress(ALACContext *alac, int32_t *output_buffer,
 102                             int nb_samples, int bps, int rice_history_mult)
 103 {
 104     int i;
 105     unsigned int history = alac->rice_initial_history;
 106     int sign_modifier = 0;
 107
 108     for (i = 0; i < nb_samples; i++) {
 109         int k;
 110         unsigned int x;
 111
 112         /* calculate rice param and decode next value */
 113         k = av_log2((history >> 9) + 3);
 114         k = FFMIN(k, alac->rice_limit);
 115         x = decode_scalar(&alac->gb, k, bps);
 116         x += sign_modifier;
 117         sign_modifier = 0;
 118         output_buffer[i] = (x >> 1) ^ -(x & 1);
 119
 120         /* update the history */
 121         if (x > 0xffff)
 122             history = 0xffff;
 123         else
 124             history +=         x * rice_history_mult -
 125                        ((history * rice_history_mult) >> 9);
 126
 127         /* special case: there may be compressed blocks of 0 */
 128         if ((history < 128) && (i + 1 < nb_samples)) {
 129             int block_size;
 130
 131             /* calculate rice param and decode block size */
 132             k = 7 - av_log2(history) + ((history + 16) >> 6);
 133             k = FFMIN(k, alac->rice_limit);
 134             block_size = decode_scalar(&alac->gb, k, 16);
 135
 136             if (block_size > 0) {
 137                 if (block_size >= nb_samples - i) {
 138                     av_log(alac->avctx, AV_LOG_ERROR,
 139                            "invalid zero block size of %d %d %d\n", block_size,
 140                            nb_samples, i);
 141                     block_size = nb_samples - i - 1;
 142                 }
 143                 memset(&output_buffer[i + 1], 0,
 144                        block_size * sizeof(*output_buffer));
 145                 i += block_size;
 146             }
 147             if (block_size <= 0xffff)
 148                 sign_modifier = 1;
 149             history = 0;
 150         }
 151     }
 152 }
 153
 154 static inline int sign_only(int v)
 155 {
 156     return v ? FFSIGN(v) : 0;
 157 }
 158
 159 static void lpc_prediction(int32_t *error_buffer, int32_t *buffer_out,
 160                            int nb_samples, int bps, int16_t *lpc_coefs,
 161                            int lpc_order, int lpc_quant)
 162 {
 163     int i;
 164     int32_t *pred = buffer_out;
 165
 166     /* first sample always copies */
 167     *buffer_out = *error_buffer;
 168
 169     if (nb_samples <= 1)
 170         return;
 171
 172     if (!lpc_order) {
 173         memcpy(&buffer_out[1], &error_buffer[1],
 174                (nb_samples - 1) * sizeof(*buffer_out));
 175         return;
 176     }
 177
 178     if (lpc_order == 31) {
 179         /* simple 1st-order prediction */
 180         for (i = 1; i < nb_samples; i++) {
 181             buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i],
 182                                         bps);
 183         }
 184         return;
 185     }
 186
 187     /* read warm-up samples */
 188     for (i = 1; i <= lpc_order; i++)
 189         buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i], bps);
 190
 191     /* NOTE: 4 and 8 are very common cases that could be optimized. */
 192
 193     for (; i < nb_samples; i++) {
 194         int j;
 195         int val = 0;
 196         int error_val = error_buffer[i];
 197         int error_sign;
 198         int d = *pred++;
 199
 200         /* LPC prediction */
 201         for (j = 0; j < lpc_order; j++)
 202             val += (pred[j] - d) * lpc_coefs[j];
 203         val = (val + (1 << (lpc_quant - 1))) >> lpc_quant;
 204         val += d + error_val;
 205         buffer_out[i] = sign_extend(val, bps);
 206
 207         /* adapt LPC coefficients */
 208         error_sign = sign_only(error_val);
 209         if (error_sign) {
 210             for (j = 0; j < lpc_order && error_val * error_sign > 0; j++) {
 211                 int sign;
 212                 val  = d - pred[j];
 213                 sign = sign_only(val) * error_sign;
 214                 lpc_coefs[j] -= sign;
 215                 val *= sign;
 216                 error_val -= (val >> lpc_quant) * (j + 1);
 217             }
 218         }
 219     }
 220 }
 221
 222 static void decorrelate_stereo(int32_t *buffer[2], int nb_samples,
 223                                int decorr_shift, int decorr_left_weight)
 224 {
 225     int i;
 226
 227     for (i = 0; i < nb_samples; i++) {
 228         int32_t a, b;
 229
 230         a = buffer[0][i];
 231         b = buffer[1][i];
 232
 233         a -= (b * decorr_left_weight) >> decorr_shift;
 234         b += a;
 235
 236         buffer[0][i] = b;
 237         buffer[1][i] = a;
 238     }
 239 }
 240
 241 static void append_extra_bits(int32_t *buffer[2], int32_t *extra_bits_buffer[2],
 242                               int extra_bits, int channels, int nb_samples)
 243 {
 244     int i, ch;
 245
 246     for (ch = 0; ch < channels; ch++)
 247         for (i = 0; i < nb_samples; i++)
 248             buffer[ch][i] = (buffer[ch][i] << extra_bits) | extra_bits_buffer[ch][i];
 249 }
 250
 251 static int decode_element(AVCodecContext *avctx, void *data, int ch_index,
 252                           int channels)
 253 {
 254     ALACContext *alac = avctx->priv_data;
 255     int has_size, bps, is_compressed, decorr_shift, decorr_left_weight, ret;
 256     uint32_t output_samples;
 257     int i, ch;
 258
 259     skip_bits(&alac->gb, 4);  /* element instance tag */
 260     skip_bits(&alac->gb, 12); /* unused header bits */
 261
 262     /* the number of output samples is stored in the frame */
 263     has_size = get_bits1(&alac->gb);
 264
 265     alac->extra_bits = get_bits(&alac->gb, 2) << 3;
 266     bps = alac->sample_size - alac->extra_bits + channels - 1;
 267     if (bps > 32) {
 268         av_log(avctx, AV_LOG_ERROR, "bps is unsupported: %d\n", bps);
 269         return AVERROR_PATCHWELCOME;
 270     }
 271
 272     /* whether the frame is compressed */
 273     is_compressed = !get_bits1(&alac->gb);
 274
 275     if (has_size)
 276         output_samples = get_bits_long(&alac->gb, 32);
 277     else
 278         output_samples = alac->max_samples_per_frame;
 279     if (!output_samples || output_samples > alac->max_samples_per_frame) {
 280         av_log(avctx, AV_LOG_ERROR, "invalid samples per frame: %d\n",
 281                output_samples);
 282         return AVERROR_INVALIDDATA;
 283     }
 284     if (!alac->nb_samples) {
 285         /* get output buffer */
 286         alac->frame.nb_samples = output_samples;
 287         if ((ret = ff_get_buffer(avctx, &alac->frame)) < 0) {
 288             av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 289             return ret;
 290         }
 291     } else if (output_samples != alac->nb_samples) {
 292         av_log(avctx, AV_LOG_ERROR, "sample count mismatch: %u != %d\n",
 293                output_samples, alac->nb_samples);
 294         return AVERROR_INVALIDDATA;
 295     }
 296     alac->nb_samples = output_samples;
 297     if (alac->sample_size > 16) {
 298         for (ch = 0; ch < channels; ch++)
 299             alac->output_samples_buffer[ch] = (int32_t *)alac->frame.extended_data[ch_index + ch];
 300     }
 301
 302     if (is_compressed) {
 303         int16_t lpc_coefs[2][32];
 304         int lpc_order[2];
 305         int prediction_type[2];
 306         int lpc_quant[2];
 307         int rice_history_mult[2];
 308
 309         decorr_shift       = get_bits(&alac->gb, 8);
 310         decorr_left_weight = get_bits(&alac->gb, 8);
 311
 312         for (ch = 0; ch < channels; ch++) {
 313             prediction_type[ch]   = get_bits(&alac->gb, 4);
 314             lpc_quant[ch]         = get_bits(&alac->gb, 4);
 315             rice_history_mult[ch] = get_bits(&alac->gb, 3);
 316             lpc_order[ch]         = get_bits(&alac->gb, 5);
 317
 318             /* read the predictor table */
 319             for (i = lpc_order[ch] - 1; i >= 0; i--)
 320                 lpc_coefs[ch][i] = get_sbits(&alac->gb, 16);
 321         }
 322
 323         if (alac->extra_bits) {
 324             for (i = 0; i < alac->nb_samples; i++) {
 325                 for (ch = 0; ch < channels; ch++)
 326                     alac->extra_bits_buffer[ch][i] = get_bits(&alac->gb, alac->extra_bits);
 327             }
 328         }
 329         for (ch = 0; ch < channels; ch++) {
 330             rice_decompress(alac, alac->predict_error_buffer[ch],
 331                             alac->nb_samples, bps,
 332                             rice_history_mult[ch] * alac->rice_history_mult / 4);
 333
 334             /* adaptive FIR filter */
 335             if (prediction_type[ch] == 15) {
 336                 /* Prediction type 15 runs the adaptive FIR twice.
 337                  * The first pass uses the special-case coef_num = 31, while
 338                  * the second pass uses the coefs from the bitstream.
 339                  *
 340                  * However, this prediction type is not currently used by the
 341                  * reference encoder.
 342                  */
 343                 lpc_prediction(alac->predict_error_buffer[ch],
 344                                alac->predict_error_buffer[ch],
 345                                alac->nb_samples, bps, NULL, 31, 0);
 346             } else if (prediction_type[ch] > 0) {
 347                 av_log(avctx, AV_LOG_WARNING, "unknown prediction type: %i\n",
 348                        prediction_type[ch]);
 349             }
 350             lpc_prediction(alac->predict_error_buffer[ch],
 351                            alac->output_samples_buffer[ch], alac->nb_samples,
 352                            bps, lpc_coefs[ch], lpc_order[ch], lpc_quant[ch]);
 353         }
 354     } else {
 355         /* not compressed, easy case */
 356         for (i = 0; i < alac->nb_samples; i++) {
 357             for (ch = 0; ch < channels; ch++) {
 358                 alac->output_samples_buffer[ch][i] =
 359                          get_sbits_long(&alac->gb, alac->sample_size);
 360             }
 361         }
 362         alac->extra_bits   = 0;
 363         decorr_shift       = 0;
 364         decorr_left_weight = 0;
 365     }
 366
 367     if (channels == 2 && decorr_left_weight) {
 368         decorrelate_stereo(alac->output_samples_buffer, alac->nb_samples,
 369                            decorr_shift, decorr_left_weight);
 370     }
 371
 372     if (alac->extra_bits) {
 373         append_extra_bits(alac->output_samples_buffer, alac->extra_bits_buffer,
 374                           alac->extra_bits, channels, alac->nb_samples);
 375     }
 376
 377     switch(alac->sample_size) {
 378     case 16: {
 379         for (ch = 0; ch < channels; ch++) {
 380             int16_t *outbuffer = (int16_t *)alac->frame.extended_data[ch_index + ch];
 381             for (i = 0; i < alac->nb_samples; i++)
 382                 *outbuffer++ = alac->output_samples_buffer[ch][i];
 383         }}
 384         break;
 385     case 24: {
 386         for (ch = 0; ch < channels; ch++) {
 387             for (i = 0; i < alac->nb_samples; i++)
 388                 alac->output_samples_buffer[ch][i] <<= 8;
 389         }}
 390         break;
 391     }
 392
 393     return 0;
 394 }
 395
 396 static int alac_decode_frame(AVCodecContext *avctx, void *data,
 397                              int *got_frame_ptr, AVPacket *avpkt)
 398 {
 399     ALACContext *alac = avctx->priv_data;
 400     enum AlacRawDataBlockType element;
 401     int channels;
 402     int ch, ret, got_end;
 403
 404     init_get_bits(&alac->gb, avpkt->data, avpkt->size * 8);
 405
 406     got_end = 0;
 407     alac->nb_samples = 0;
 408     ch = 0;
 409     while (get_bits_left(&alac->gb) >= 3) {
 410         element = get_bits(&alac->gb, 3);
 411         if (element == TYPE_END) {
 412             got_end = 1;
 413             break;
 414         }
 415         if (element > TYPE_CPE && element != TYPE_LFE) {
 416             av_log(avctx, AV_LOG_ERROR, "syntax element unsupported: %d", element);
 417             return AVERROR_PATCHWELCOME;
 418         }
 419
 420         channels = (element == TYPE_CPE) ? 2 : 1;
 421         if (ch + channels > alac->channels) {
 422             av_log(avctx, AV_LOG_ERROR, "invalid element channel count\n");
 423             return AVERROR_INVALIDDATA;
 424         }
 425
 426         ret = decode_element(avctx, data,
 427                              ff_alac_channel_layout_offsets[alac->channels - 1][ch],
 428                              channels);
 429         if (ret < 0 && get_bits_left(&alac->gb))
 430             return ret;
 431
 432         ch += channels;
 433     }
 434     if (!got_end) {
 435         av_log(avctx, AV_LOG_ERROR, "no end tag found. incomplete packet.\n");
 436         return AVERROR_INVALIDDATA;
 437     }
 438
 439     if (avpkt->size * 8 - get_bits_count(&alac->gb) > 8) {
 440         av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n",
 441                avpkt->size * 8 - get_bits_count(&alac->gb));
 442     }
 443
 444     *got_frame_ptr   = 1;
 445     *(AVFrame *)data = alac->frame;
 446
 447     return avpkt->size;
 448 }
 449
 450 static av_cold int alac_decode_close(AVCodecContext *avctx)
 451 {
 452     ALACContext *alac = avctx->priv_data;
 453
 454     int ch;
 455     for (ch = 0; ch < FFMIN(alac->channels, 2); ch++) {
 456         av_freep(&alac->predict_error_buffer[ch]);
 457         if (alac->sample_size == 16)
 458             av_freep(&alac->output_samples_buffer[ch]);
 459         av_freep(&alac->extra_bits_buffer[ch]);
 460     }
 461
 462     return 0;
 463 }
 464
 465 static int allocate_buffers(ALACContext *alac)
 466 {
 467     int ch;
 468     int buf_size = alac->max_samples_per_frame * sizeof(int32_t);
 469
 470     for (ch = 0; ch < FFMIN(alac->channels, 2); ch++) {
 471         FF_ALLOC_OR_GOTO(alac->avctx, alac->predict_error_buffer[ch],
 472                          buf_size, buf_alloc_fail);
 473
 474         if (alac->sample_size == 16) {
 475             FF_ALLOC_OR_GOTO(alac->avctx, alac->output_samples_buffer[ch],
 476                              buf_size, buf_alloc_fail);
 477         }
 478
 479         FF_ALLOC_OR_GOTO(alac->avctx, alac->extra_bits_buffer[ch],
 480                          buf_size, buf_alloc_fail);
 481     }
 482     return 0;
 483 buf_alloc_fail:
 484     alac_decode_close(alac->avctx);
 485     return AVERROR(ENOMEM);
 486 }
 487
 488 static int alac_set_info(ALACContext *alac)
 489 {
 490     GetByteContext gb;
 491
 492     bytestream2_init(&gb, alac->avctx->extradata,
 493                      alac->avctx->extradata_size);
 494
 495     bytestream2_skipu(&gb, 12); // size:4, alac:4, version:4
 496
 497     alac->max_samples_per_frame = bytestream2_get_be32u(&gb);
 498     if (!alac->max_samples_per_frame || alac->max_samples_per_frame > INT_MAX) {
 499         av_log(alac->avctx, AV_LOG_ERROR, "max samples per frame invalid: %u\n",
 500                alac->max_samples_per_frame);
 501         return AVERROR_INVALIDDATA;
 502     }
 503     bytestream2_skipu(&gb, 1);  // compatible version
 504     alac->sample_size          = bytestream2_get_byteu(&gb);
 505     alac->rice_history_mult    = bytestream2_get_byteu(&gb);
 506     alac->rice_initial_history = bytestream2_get_byteu(&gb);
 507     alac->rice_limit           = bytestream2_get_byteu(&gb);
 508     alac->channels             = bytestream2_get_byteu(&gb);
 509     bytestream2_get_be16u(&gb); // maxRun
 510     bytestream2_get_be32u(&gb); // max coded frame size
 511     bytestream2_get_be32u(&gb); // average bitrate
 512     bytestream2_get_be32u(&gb); // samplerate
 513
 514     return 0;
 515 }
 516
 517 static av_cold int alac_decode_init(AVCodecContext * avctx)
 518 {
 519     int ret;
 520     ALACContext *alac = avctx->priv_data;
 521     alac->avctx = avctx;
 522
 523     /* initialize from the extradata */
 524     if (alac->avctx->extradata_size < ALAC_EXTRADATA_SIZE) {
 525         av_log(avctx, AV_LOG_ERROR, "alac: extradata is too small\n");
 526         return AVERROR_INVALIDDATA;
 527     }
 528     if (alac_set_info(alac)) {
 529         av_log(avctx, AV_LOG_ERROR, "alac: set_info failed\n");
 530         return -1;
 531     }
 532
 533     switch (alac->sample_size) {
 534     case 16: avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
 535              break;
 536     case 24:
 537     case 32: avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
 538              break;
 539     default: av_log_ask_for_sample(avctx, "Sample depth %d is not supported.\n",
 540                                    alac->sample_size);
 541              return AVERROR_PATCHWELCOME;
 542     }
 543     avctx->bits_per_raw_sample = alac->sample_size;
 544
 545     if (alac->channels < 1) {
 546         av_log(avctx, AV_LOG_WARNING, "Invalid channel count\n");
 547         alac->channels = avctx->channels;
 548     } else {
 549         if (alac->channels > ALAC_MAX_CHANNELS)
 550             alac->channels = avctx->channels;
 551         else
 552             avctx->channels = alac->channels;
 553     }
 554     if (avctx->channels > ALAC_MAX_CHANNELS) {
 555         av_log(avctx, AV_LOG_ERROR, "Unsupported channel count: %d\n",
 556                avctx->channels);
 557         return AVERROR_PATCHWELCOME;
 558     }
 559     avctx->channel_layout = ff_alac_channel_layouts[alac->channels - 1];
 560
 561     if ((ret = allocate_buffers(alac)) < 0) {
 562         av_log(avctx, AV_LOG_ERROR, "Error allocating buffers\n");
 563         return ret;
 564     }
 565
 566     avcodec_get_frame_defaults(&alac->frame);
 567     avctx->coded_frame = &alac->frame;
 568
 569     return 0;
 570 }
 571
 572 AVCodec ff_alac_decoder = {
 573     .name           = "alac",
 574     .type           = AVMEDIA_TYPE_AUDIO,
 575     .id             = AV_CODEC_ID_ALAC,
 576     .priv_data_size = sizeof(ALACContext),
 577     .init           = alac_decode_init,
 578     .close          = alac_decode_close,
 579     .decode         = alac_decode_frame,
 580     .capabilities   = CODEC_CAP_DR1,
 581     .long_name      = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
 582 };