git.sesse.net Git - ffmpeg/blob - libavcodec/alac.c

   1 /*
   2  * ALAC (Apple Lossless Audio Codec) decoder
   3  * Copyright (c) 2005 David Hammerton
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * ALAC (Apple Lossless Audio Codec) decoder
  25  * @author 2005 David Hammerton
  26  * @see http://crazney.net/programs/itunes/alac.html
  27  *
  28  * Note: This decoder expects a 36-byte QuickTime atom to be
  29  * passed through the extradata[_size] fields. This atom is tacked onto
  30  * the end of an 'alac' stsd atom and has the following format:
  31  *
  32  * 32bit  atom size
  33  * 32bit  tag                  ("alac")
  34  * 32bit  tag version          (0)
  35  * 32bit  samples per frame    (used when not set explicitly in the frames)
  36  *  8bit  compatible version   (0)
  37  *  8bit  sample size
  38  *  8bit  history mult         (40)
  39  *  8bit  initial history      (14)
  40  *  8bit  rice param limit     (10)
  41  *  8bit  channels
  42  * 16bit  maxRun               (255)
  43  * 32bit  max coded frame size (0 means unknown)
  44  * 32bit  average bitrate      (0 means unknown)
  45  * 32bit  samplerate
  46  */
  47
  48
  49 #include "avcodec.h"
  50 #include "get_bits.h"
  51 #include "bytestream.h"
  52 #include "unary.h"
  53 #include "mathops.h"
  54
  55 #define ALAC_EXTRADATA_SIZE 36
  56 #define MAX_CHANNELS 2
  57
  58 typedef struct {
  59
  60     AVCodecContext *avctx;
  61     AVFrame frame;
  62     GetBitContext gb;
  63
  64     int channels;
  65
  66     /* buffers */
  67     int32_t *predict_error_buffer[MAX_CHANNELS];
  68     int32_t *output_samples_buffer[MAX_CHANNELS];
  69     int32_t *extra_bits_buffer[MAX_CHANNELS];
  70
  71     uint32_t max_samples_per_frame;
  72     uint8_t  sample_size;
  73     uint8_t  rice_history_mult;
  74     uint8_t  rice_initial_history;
  75     uint8_t  rice_limit;
  76
  77     int extra_bits;                         /**< number of extra bits beyond 16-bit */
  78 } ALACContext;
  79
  80 static inline int decode_scalar(GetBitContext *gb, int k, int readsamplesize)
  81 {
  82     int x = get_unary_0_9(gb);
  83
  84     if (x > 8) { /* RICE THRESHOLD */
  85         /* use alternative encoding */
  86         x = get_bits(gb, readsamplesize);
  87     } else if (k != 1) {
  88         int extrabits = show_bits(gb, k);
  89
  90         /* multiply x by 2^k - 1, as part of their strange algorithm */
  91         x = (x << k) - x;
  92
  93         if (extrabits > 1) {
  94             x += extrabits - 1;
  95             skip_bits(gb, k);
  96         } else
  97             skip_bits(gb, k - 1);
  98     }
  99     return x;
 100 }
 101
 102 static void bastardized_rice_decompress(ALACContext *alac,
 103                                         int32_t *output_buffer,
 104                                         int output_size,
 105                                         int readsamplesize,
 106                                         int rice_history_mult)
 107 {
 108     int output_count;
 109     unsigned int history = alac->rice_initial_history;
 110     int sign_modifier = 0;
 111
 112     for (output_count = 0; output_count < output_size; output_count++) {
 113         int x, k;
 114
 115         /* read k, that is bits as is */
 116         k = av_log2((history >> 9) + 3);
 117         k = FFMIN(k, alac->rice_limit);
 118         x = decode_scalar(&alac->gb, k, readsamplesize);
 119         x += sign_modifier;
 120         sign_modifier = 0;
 121
 122         output_buffer[output_count] = (x >> 1) ^ -(x & 1);
 123
 124         /* now update the history */
 125         if (x > 0xffff)
 126             history = 0xffff;
 127         else
 128             history +=         x * rice_history_mult -
 129                        ((history * rice_history_mult) >> 9);
 130
 131         /* special case: there may be compressed blocks of 0 */
 132         if ((history < 128) && (output_count+1 < output_size)) {
 133             int block_size;
 134
 135             k = 7 - av_log2(history) + ((history + 16) >> 6 /* / 64 */);
 136             k = FFMIN(k, alac->rice_limit);
 137
 138             block_size = decode_scalar(&alac->gb, k, 16);
 139
 140             if (block_size > 0) {
 141                 if(block_size >= output_size - output_count){
 142                     av_log(alac->avctx, AV_LOG_ERROR, "invalid zero block size of %d %d %d\n", block_size, output_size, output_count);
 143                     block_size= output_size - output_count - 1;
 144                 }
 145                 memset(&output_buffer[output_count + 1], 0,
 146                        block_size * sizeof(*output_buffer));
 147                 output_count += block_size;
 148             }
 149
 150             if (block_size <= 0xffff)
 151                 sign_modifier = 1;
 152
 153             history = 0;
 154         }
 155     }
 156 }
 157
 158 static inline int sign_only(int v)
 159 {
 160     return v ? FFSIGN(v) : 0;
 161 }
 162
 163 static void predictor_decompress_fir_adapt(int32_t *error_buffer,
 164                                            int32_t *buffer_out,
 165                                            int output_size,
 166                                            int readsamplesize,
 167                                            int16_t *predictor_coef_table,
 168                                            int predictor_coef_num,
 169                                            int predictor_quantitization)
 170 {
 171     int i;
 172
 173     /* first sample always copies */
 174     *buffer_out = *error_buffer;
 175
 176     if (!predictor_coef_num) {
 177         if (output_size <= 1)
 178             return;
 179
 180         memcpy(&buffer_out[1], &error_buffer[1],
 181                (output_size - 1) * sizeof(*buffer_out));
 182         return;
 183     }
 184
 185     if (predictor_coef_num == 31) {
 186         /* simple 1st-order prediction */
 187         if (output_size <= 1)
 188             return;
 189         for (i = 1; i < output_size; i++) {
 190             buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i],
 191                                         readsamplesize);
 192         }
 193         return;
 194     }
 195
 196     /* read warm-up samples */
 197     for (i = 0; i < predictor_coef_num; i++) {
 198         buffer_out[i + 1] = sign_extend(buffer_out[i] + error_buffer[i + 1],
 199                                         readsamplesize);
 200     }
 201
 202     /* NOTE: 4 and 8 are very common cases that could be optimized. */
 203
 204     /* general case */
 205     for (i = predictor_coef_num + 1; i < output_size; i++) {
 206         int j;
 207         int val = 0;
 208         int error_val = error_buffer[i];
 209         int error_sign;
 210
 211         for (j = 0; j < predictor_coef_num; j++) {
 212             val += (buffer_out[predictor_coef_num-j] - buffer_out[0]) *
 213                    predictor_coef_table[j];
 214         }
 215
 216         val = (val + (1 << (predictor_quantitization - 1))) >>
 217               predictor_quantitization;
 218         val += buffer_out[0] + error_val;
 219
 220         buffer_out[predictor_coef_num + 1] = sign_extend(val, readsamplesize);
 221
 222         /* adapt LPC coefficients */
 223         error_sign = sign_only(error_val);
 224         if (error_sign) {
 225             for (j = predictor_coef_num - 1; j >= 0 && error_val * error_sign > 0; j--) {
 226                 int sign;
 227                 val  = buffer_out[0] - buffer_out[predictor_coef_num - j];
 228                 sign = sign_only(val) * error_sign;
 229                 predictor_coef_table[j] -= sign;
 230                 val *= sign;
 231                 error_val -= ((val >> predictor_quantitization) *
 232                               (predictor_coef_num - j));
 233             }
 234         }
 235
 236         buffer_out++;
 237     }
 238 }
 239
 240 static void decorrelate_stereo(int32_t *buffer[MAX_CHANNELS],
 241                                int numsamples, uint8_t interlacing_shift,
 242                                uint8_t interlacing_leftweight)
 243 {
 244     int i;
 245
 246     for (i = 0; i < numsamples; i++) {
 247         int32_t a, b;
 248
 249         a = buffer[0][i];
 250         b = buffer[1][i];
 251
 252         a -= (b * interlacing_leftweight) >> interlacing_shift;
 253         b += a;
 254
 255         buffer[0][i] = b;
 256         buffer[1][i] = a;
 257     }
 258 }
 259
 260 static void append_extra_bits(int32_t *buffer[MAX_CHANNELS],
 261                               int32_t *extra_bits_buffer[MAX_CHANNELS],
 262                               int extra_bits, int numchannels, int numsamples)
 263 {
 264     int i, ch;
 265
 266     for (ch = 0; ch < numchannels; ch++)
 267         for (i = 0; i < numsamples; i++)
 268             buffer[ch][i] = (buffer[ch][i] << extra_bits) | extra_bits_buffer[ch][i];
 269 }
 270
 271 static void interleave_stereo_16(int32_t *buffer[MAX_CHANNELS],
 272                                  int16_t *buffer_out, int numsamples)
 273 {
 274     int i;
 275
 276     for (i = 0; i < numsamples; i++) {
 277         *buffer_out++ = buffer[0][i];
 278         *buffer_out++ = buffer[1][i];
 279     }
 280 }
 281
 282 static void interleave_stereo_24(int32_t *buffer[MAX_CHANNELS],
 283                                  int32_t *buffer_out, int numsamples)
 284 {
 285     int i;
 286
 287     for (i = 0; i < numsamples; i++) {
 288         *buffer_out++ = buffer[0][i] << 8;
 289         *buffer_out++ = buffer[1][i] << 8;
 290     }
 291 }
 292
 293 static int alac_decode_frame(AVCodecContext *avctx, void *data,
 294                              int *got_frame_ptr, AVPacket *avpkt)
 295 {
 296     const uint8_t *inbuffer = avpkt->data;
 297     int input_buffer_size = avpkt->size;
 298     ALACContext *alac = avctx->priv_data;
 299
 300     int channels;
 301     unsigned int outputsamples;
 302     int hassize;
 303     unsigned int readsamplesize;
 304     int isnotcompressed;
 305     uint8_t interlacing_shift;
 306     uint8_t interlacing_leftweight;
 307     int i, ch, ret;
 308
 309     init_get_bits(&alac->gb, inbuffer, input_buffer_size * 8);
 310
 311     channels = get_bits(&alac->gb, 3) + 1;
 312     if (channels != avctx->channels) {
 313         av_log(avctx, AV_LOG_ERROR, "frame header channel count mismatch\n");
 314         return AVERROR_INVALIDDATA;
 315     }
 316
 317     skip_bits(&alac->gb, 4);  /* element instance tag */
 318     skip_bits(&alac->gb, 12); /* unused header bits */
 319
 320     /* the number of output samples is stored in the frame */
 321     hassize = get_bits1(&alac->gb);
 322
 323     alac->extra_bits = get_bits(&alac->gb, 2) << 3;
 324
 325     /* whether the frame is compressed */
 326     isnotcompressed = get_bits1(&alac->gb);
 327
 328     if (hassize) {
 329         /* now read the number of samples as a 32bit integer */
 330         outputsamples = get_bits_long(&alac->gb, 32);
 331         if (outputsamples > alac->max_samples_per_frame) {
 332             av_log(avctx, AV_LOG_ERROR, "outputsamples %d > %d\n",
 333                    outputsamples, alac->max_samples_per_frame);
 334             return -1;
 335         }
 336     } else
 337         outputsamples = alac->max_samples_per_frame;
 338
 339     /* get output buffer */
 340     if (outputsamples > INT32_MAX) {
 341         av_log(avctx, AV_LOG_ERROR, "unsupported block size: %u\n", outputsamples);
 342         return AVERROR_INVALIDDATA;
 343     }
 344     alac->frame.nb_samples = outputsamples;
 345     if ((ret = avctx->get_buffer(avctx, &alac->frame)) < 0) {
 346         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 347         return ret;
 348     }
 349
 350     readsamplesize = alac->sample_size - alac->extra_bits + channels - 1;
 351     if (readsamplesize > MIN_CACHE_BITS) {
 352         av_log(avctx, AV_LOG_ERROR, "readsamplesize too big (%d)\n", readsamplesize);
 353         return -1;
 354     }
 355
 356     if (!isnotcompressed) {
 357         /* so it is compressed */
 358         int16_t predictor_coef_table[MAX_CHANNELS][32];
 359         int predictor_coef_num[MAX_CHANNELS];
 360         int prediction_type[MAX_CHANNELS];
 361         int prediction_quantitization[MAX_CHANNELS];
 362         int ricemodifier[MAX_CHANNELS];
 363
 364         interlacing_shift = get_bits(&alac->gb, 8);
 365         interlacing_leftweight = get_bits(&alac->gb, 8);
 366
 367         for (ch = 0; ch < channels; ch++) {
 368             prediction_type[ch] = get_bits(&alac->gb, 4);
 369             prediction_quantitization[ch] = get_bits(&alac->gb, 4);
 370
 371             ricemodifier[ch] = get_bits(&alac->gb, 3);
 372             predictor_coef_num[ch] = get_bits(&alac->gb, 5);
 373
 374             /* read the predictor table */
 375             for (i = 0; i < predictor_coef_num[ch]; i++)
 376                 predictor_coef_table[ch][i] = (int16_t)get_bits(&alac->gb, 16);
 377         }
 378
 379         if (alac->extra_bits) {
 380             for (i = 0; i < outputsamples; i++) {
 381                 for (ch = 0; ch < channels; ch++)
 382                     alac->extra_bits_buffer[ch][i] = get_bits(&alac->gb, alac->extra_bits);
 383             }
 384         }
 385         for (ch = 0; ch < channels; ch++) {
 386             bastardized_rice_decompress(alac,
 387                                         alac->predict_error_buffer[ch],
 388                                         outputsamples,
 389                                         readsamplesize,
 390                                         ricemodifier[ch] * alac->rice_history_mult / 4);
 391
 392             /* adaptive FIR filter */
 393             if (prediction_type[ch] == 15) {
 394                 /* Prediction type 15 runs the adaptive FIR twice.
 395                  * The first pass uses the special-case coef_num = 31, while
 396                  * the second pass uses the coefs from the bitstream.
 397                  *
 398                  * However, this prediction type is not currently used by the
 399                  * reference encoder.
 400                  */
 401                 predictor_decompress_fir_adapt(alac->predict_error_buffer[ch],
 402                                                alac->predict_error_buffer[ch],
 403                                                outputsamples, readsamplesize,
 404                                                NULL, 31, 0);
 405             } else if (prediction_type[ch] > 0) {
 406                 av_log(avctx, AV_LOG_WARNING, "unknown prediction type: %i\n",
 407                        prediction_type[ch]);
 408             }
 409             predictor_decompress_fir_adapt(alac->predict_error_buffer[ch],
 410                                            alac->output_samples_buffer[ch],
 411                                            outputsamples, readsamplesize,
 412                                            predictor_coef_table[ch],
 413                                            predictor_coef_num[ch],
 414                                            prediction_quantitization[ch]);
 415         }
 416     } else {
 417         /* not compressed, easy case */
 418         for (i = 0; i < outputsamples; i++) {
 419             for (ch = 0; ch < channels; ch++) {
 420                 alac->output_samples_buffer[ch][i] = get_sbits_long(&alac->gb,
 421                                                                     alac->sample_size);
 422             }
 423         }
 424         alac->extra_bits = 0;
 425         interlacing_shift = 0;
 426         interlacing_leftweight = 0;
 427     }
 428     if (get_bits(&alac->gb, 3) != 7)
 429         av_log(avctx, AV_LOG_ERROR, "Error : Wrong End Of Frame\n");
 430
 431     if (channels == 2 && interlacing_leftweight) {
 432         decorrelate_stereo(alac->output_samples_buffer, outputsamples,
 433                            interlacing_shift, interlacing_leftweight);
 434     }
 435
 436     if (alac->extra_bits) {
 437         append_extra_bits(alac->output_samples_buffer, alac->extra_bits_buffer,
 438                           alac->extra_bits, alac->channels, outputsamples);
 439     }
 440
 441     switch(alac->sample_size) {
 442     case 16:
 443         if (channels == 2) {
 444             interleave_stereo_16(alac->output_samples_buffer,
 445                                  (int16_t *)alac->frame.data[0], outputsamples);
 446         } else {
 447             int16_t *outbuffer = (int16_t *)alac->frame.data[0];
 448             for (i = 0; i < outputsamples; i++) {
 449                 outbuffer[i] = alac->output_samples_buffer[0][i];
 450             }
 451         }
 452         break;
 453     case 24:
 454         if (channels == 2) {
 455             interleave_stereo_24(alac->output_samples_buffer,
 456                                  (int32_t *)alac->frame.data[0], outputsamples);
 457         } else {
 458             int32_t *outbuffer = (int32_t *)alac->frame.data[0];
 459             for (i = 0; i < outputsamples; i++)
 460                 outbuffer[i] = alac->output_samples_buffer[0][i] << 8;
 461         }
 462         break;
 463     }
 464
 465     if (input_buffer_size * 8 - get_bits_count(&alac->gb) > 8)
 466         av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n", input_buffer_size * 8 - get_bits_count(&alac->gb));
 467
 468     *got_frame_ptr   = 1;
 469     *(AVFrame *)data = alac->frame;
 470
 471     return input_buffer_size;
 472 }
 473
 474 static av_cold int alac_decode_close(AVCodecContext *avctx)
 475 {
 476     ALACContext *alac = avctx->priv_data;
 477
 478     int ch;
 479     for (ch = 0; ch < alac->channels; ch++) {
 480         av_freep(&alac->predict_error_buffer[ch]);
 481         av_freep(&alac->output_samples_buffer[ch]);
 482         av_freep(&alac->extra_bits_buffer[ch]);
 483     }
 484
 485     return 0;
 486 }
 487
 488 static int allocate_buffers(ALACContext *alac)
 489 {
 490     int ch;
 491     for (ch = 0; ch < alac->channels; ch++) {
 492         int buf_size = alac->max_samples_per_frame * sizeof(int32_t);
 493
 494         FF_ALLOC_OR_GOTO(alac->avctx, alac->predict_error_buffer[ch],
 495                          buf_size, buf_alloc_fail);
 496
 497         FF_ALLOC_OR_GOTO(alac->avctx, alac->output_samples_buffer[ch],
 498                          buf_size, buf_alloc_fail);
 499
 500         FF_ALLOC_OR_GOTO(alac->avctx, alac->extra_bits_buffer[ch],
 501                          buf_size, buf_alloc_fail);
 502     }
 503     return 0;
 504 buf_alloc_fail:
 505     alac_decode_close(alac->avctx);
 506     return AVERROR(ENOMEM);
 507 }
 508
 509 static int alac_set_info(ALACContext *alac)
 510 {
 511     GetByteContext gb;
 512
 513     bytestream2_init(&gb, alac->avctx->extradata,
 514                      alac->avctx->extradata_size);
 515
 516     bytestream2_skipu(&gb, 12); // size:4, alac:4, version:4
 517
 518     alac->max_samples_per_frame = bytestream2_get_be32u(&gb);
 519     if (alac->max_samples_per_frame >= UINT_MAX/4){
 520         av_log(alac->avctx, AV_LOG_ERROR,
 521                "max_samples_per_frame too large\n");
 522         return AVERROR_INVALIDDATA;
 523     }
 524     bytestream2_skipu(&gb, 1);  // compatible version
 525     alac->sample_size          = bytestream2_get_byteu(&gb);
 526     alac->rice_history_mult    = bytestream2_get_byteu(&gb);
 527     alac->rice_initial_history = bytestream2_get_byteu(&gb);
 528     alac->rice_limit           = bytestream2_get_byteu(&gb);
 529     alac->channels             = bytestream2_get_byteu(&gb);
 530     bytestream2_get_be16u(&gb); // maxRun
 531     bytestream2_get_be32u(&gb); // max coded frame size
 532     bytestream2_get_be32u(&gb); // average bitrate
 533     bytestream2_get_be32u(&gb); // samplerate
 534
 535     return 0;
 536 }
 537
 538 static av_cold int alac_decode_init(AVCodecContext * avctx)
 539 {
 540     int ret;
 541     ALACContext *alac = avctx->priv_data;
 542     alac->avctx = avctx;
 543
 544     /* initialize from the extradata */
 545     if (alac->avctx->extradata_size != ALAC_EXTRADATA_SIZE) {
 546         av_log(avctx, AV_LOG_ERROR, "alac: expected %d extradata bytes\n",
 547             ALAC_EXTRADATA_SIZE);
 548         return -1;
 549     }
 550     if (alac_set_info(alac)) {
 551         av_log(avctx, AV_LOG_ERROR, "alac: set_info failed\n");
 552         return -1;
 553     }
 554
 555     switch (alac->sample_size) {
 556     case 16: avctx->sample_fmt    = AV_SAMPLE_FMT_S16;
 557              break;
 558     case 24: avctx->sample_fmt    = AV_SAMPLE_FMT_S32;
 559              break;
 560     default: av_log_ask_for_sample(avctx, "Sample depth %d is not supported.\n",
 561                                    alac->sample_size);
 562              return AVERROR_PATCHWELCOME;
 563     }
 564
 565     if (alac->channels < 1) {
 566         av_log(avctx, AV_LOG_WARNING, "Invalid channel count\n");
 567         alac->channels = avctx->channels;
 568     } else {
 569         if (alac->channels > MAX_CHANNELS)
 570             alac->channels = avctx->channels;
 571         else
 572             avctx->channels = alac->channels;
 573     }
 574     if (avctx->channels > MAX_CHANNELS) {
 575         av_log(avctx, AV_LOG_ERROR, "Unsupported channel count: %d\n",
 576                avctx->channels);
 577         return AVERROR_PATCHWELCOME;
 578     }
 579
 580     if ((ret = allocate_buffers(alac)) < 0) {
 581         av_log(avctx, AV_LOG_ERROR, "Error allocating buffers\n");
 582         return ret;
 583     }
 584
 585     avcodec_get_frame_defaults(&alac->frame);
 586     avctx->coded_frame = &alac->frame;
 587
 588     return 0;
 589 }
 590
 591 AVCodec ff_alac_decoder = {
 592     .name           = "alac",
 593     .type           = AVMEDIA_TYPE_AUDIO,
 594     .id             = CODEC_ID_ALAC,
 595     .priv_data_size = sizeof(ALACContext),
 596     .init           = alac_decode_init,
 597     .close          = alac_decode_close,
 598     .decode         = alac_decode_frame,
 599     .capabilities   = CODEC_CAP_DR1,
 600     .long_name      = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
 601 };