Merge commit 'e46a6fb7732a7caef97a916a4f765ec0f779d195'

[ffmpeg] / libavcodec / atrac3.c
diff --git a/libavcodec/atrac3.c b/libavcodec/atrac3.c

index ffd93e4946e70e15d32802c5969dfe92ee960b16..88ee9babcc790fcc6f7a9fef408b7c75b4486697 100644 (file)
--- a/libavcodec/atrac3.c
+++ b/libavcodec/atrac3.c
@@ -48,6 +48,10 @@
  #include "atrac.h"
  #include "atrac3data.h"
  
+#define MIN_CHANNELS    1
+#define MAX_CHANNELS    8
+#define MAX_JS_PAIRS    8 / 2
+
  #define JOINT_STEREO    0x12
  #define SINGLE          0x2
  
@@ -90,10 +94,10 @@ typedef struct ATRAC3Context {
      //@}
      //@{
      /** joint-stereo related variables */
-    int matrix_coeff_index_prev[4];
-    int matrix_coeff_index_now[4];
-    int matrix_coeff_index_next[4];
-    int weighting_delay[6];
+    int matrix_coeff_index_prev[MAX_JS_PAIRS][4];
+    int matrix_coeff_index_now[MAX_JS_PAIRS][4];
+    int matrix_coeff_index_next[MAX_JS_PAIRS][4];
+    int weighting_delay[MAX_JS_PAIRS][6];
      //@}
      //@{
      /** data buffers */
@@ -577,7 +581,7 @@ static int decode_channel_sound_unit(ATRAC3Context *q, GetBitContext *gb,
      GainBlock *gain1 = &snd->gain_block[    snd->gc_blk_switch];
      GainBlock *gain2 = &snd->gain_block[1 - snd->gc_blk_switch];
  
-    if (coding_mode == JOINT_STEREO && channel_num == 1) {
+    if (coding_mode == JOINT_STEREO && (channel_num % 2) == 1) {
          if (get_bits(gb, 2) != 3) {
              av_log(NULL,AV_LOG_ERROR,"JS mono Sound Unit id != 3.\n");
              return AVERROR_INVALIDDATA;
@@ -640,67 +644,83 @@ static int decode_frame(AVCodecContext *avctx, const uint8_t *databuf,
                          float **out_samples)
  {
      ATRAC3Context *q = avctx->priv_data;
-    int ret, i;
+    int ret, i, ch;
      uint8_t *ptr1;
  
      if (q->coding_mode == JOINT_STEREO) {
          /* channel coupling mode */
-        /* decode Sound Unit 1 */
-        init_get_bits(&q->gb, databuf, avctx->block_align * 8);
  
-        ret = decode_channel_sound_unit(q, &q->gb, q->units, out_samples[0], 0,
-                                        JOINT_STEREO);
-        if (ret != 0)
-            return ret;
+        /* Decode sound unit pairs (channels are expected to be even).
+         * Multichannel joint stereo interleaves pairs (6ch: 2ch + 2ch + 2ch) */
+        const uint8_t *js_databuf;
+        int js_pair, js_block_align;
  
-        /* Framedata of the su2 in the joint-stereo mode is encoded in
-         * reverse byte order so we need to swap it first. */
-        if (databuf == q->decoded_bytes_buffer) {
-            uint8_t *ptr2 = q->decoded_bytes_buffer + avctx->block_align - 1;
-            ptr1          = q->decoded_bytes_buffer;
-            for (i = 0; i < avctx->block_align / 2; i++, ptr1++, ptr2--)
-                FFSWAP(uint8_t, *ptr1, *ptr2);
-        } else {
-            const uint8_t *ptr2 = databuf + avctx->block_align - 1;
-            for (i = 0; i < avctx->block_align; i++)
-                q->decoded_bytes_buffer[i] = *ptr2--;
-        }
+        js_block_align = (avctx->block_align / avctx->channels) * 2; /* block pair */
  
-        /* Skip the sync codes (0xF8). */
-        ptr1 = q->decoded_bytes_buffer;
-        for (i = 4; *ptr1 == 0xF8; i++, ptr1++) {
-            if (i >= avctx->block_align)
-                return AVERROR_INVALIDDATA;
-        }
+        for (ch = 0; ch < avctx->channels; ch = ch + 2) {
+            js_pair = ch/2;
+            js_databuf = databuf + js_pair * js_block_align; /* align to current pair */
+
+            /* Set the bitstream reader at the start of first channel sound unit. */
+            init_get_bits(&q->gb,
+                          js_databuf, js_block_align * 8);
  
+            /* decode Sound Unit 1 */
+            ret = decode_channel_sound_unit(q, &q->gb, &q->units[ch],
+                                            out_samples[ch], ch, JOINT_STEREO);
+            if (ret != 0)
+                return ret;
  
-        /* set the bitstream reader at the start of the second Sound Unit*/
-        init_get_bits8(&q->gb, ptr1, q->decoded_bytes_buffer + avctx->block_align - ptr1);
+            /* Framedata of the su2 in the joint-stereo mode is encoded in
+             * reverse byte order so we need to swap it first. */
+            if (js_databuf == q->decoded_bytes_buffer) {
+                uint8_t *ptr2 = q->decoded_bytes_buffer + js_block_align - 1;
+                ptr1          = q->decoded_bytes_buffer;
+                for (i = 0; i < js_block_align / 2; i++, ptr1++, ptr2--)
+                    FFSWAP(uint8_t, *ptr1, *ptr2);
+            } else {
+                const uint8_t *ptr2 = js_databuf + js_block_align - 1;
+                for (i = 0; i < js_block_align; i++)
+                    q->decoded_bytes_buffer[i] = *ptr2--;
+            }
  
-        /* Fill the Weighting coeffs delay buffer */
-        memmove(q->weighting_delay, &q->weighting_delay[2],
-                4 * sizeof(*q->weighting_delay));
-        q->weighting_delay[4] = get_bits1(&q->gb);
-        q->weighting_delay[5] = get_bits(&q->gb, 3);
+            /* Skip the sync codes (0xF8). */
+            ptr1 = q->decoded_bytes_buffer;
+            for (i = 4; *ptr1 == 0xF8; i++, ptr1++) {
+                if (i >= js_block_align)
+                    return AVERROR_INVALIDDATA;
+            }
  
-        for (i = 0; i < 4; i++) {
-            q->matrix_coeff_index_prev[i] = q->matrix_coeff_index_now[i];
-            q->matrix_coeff_index_now[i]  = q->matrix_coeff_index_next[i];
-            q->matrix_coeff_index_next[i] = get_bits(&q->gb, 2);
-        }
  
-        /* Decode Sound Unit 2. */
-        ret = decode_channel_sound_unit(q, &q->gb, &q->units[1],
-                                        out_samples[1], 1, JOINT_STEREO);
-        if (ret != 0)
-            return ret;
+            /* set the bitstream reader at the start of the second Sound Unit */
+            init_get_bits8(&q->gb,
+                           ptr1, q->decoded_bytes_buffer + js_block_align - ptr1);
  
-        /* Reconstruct the channel coefficients. */
-        reverse_matrixing(out_samples[0], out_samples[1],
-                          q->matrix_coeff_index_prev,
-                          q->matrix_coeff_index_now);
+            /* Fill the Weighting coeffs delay buffer */
+            memmove(q->weighting_delay[js_pair], &q->weighting_delay[js_pair][2],
+                    4 * sizeof(*q->weighting_delay[js_pair]));
+            q->weighting_delay[js_pair][4] = get_bits1(&q->gb);
+            q->weighting_delay[js_pair][5] = get_bits(&q->gb, 3);
  
-        channel_weighting(out_samples[0], out_samples[1], q->weighting_delay);
+            for (i = 0; i < 4; i++) {
+                q->matrix_coeff_index_prev[js_pair][i] = q->matrix_coeff_index_now[js_pair][i];
+                q->matrix_coeff_index_now[js_pair][i]  = q->matrix_coeff_index_next[js_pair][i];
+                q->matrix_coeff_index_next[js_pair][i] = get_bits(&q->gb, 2);
+            }
+
+            /* Decode Sound Unit 2. */
+            ret = decode_channel_sound_unit(q, &q->gb, &q->units[ch+1],
+                                            out_samples[ch+1], ch+1, JOINT_STEREO);
+            if (ret != 0)
+                return ret;
+
+            /* Reconstruct the channel coefficients. */
+            reverse_matrixing(out_samples[ch], out_samples[ch+1],
+                              q->matrix_coeff_index_prev[js_pair],
+                              q->matrix_coeff_index_now[js_pair]);
+
+            channel_weighting(out_samples[ch], out_samples[ch+1], q->weighting_delay[js_pair]);
+        }
      } else {
          /* single channels */
          /* Decode the channel sound units. */
@@ -731,6 +751,40 @@ static int decode_frame(AVCodecContext *avctx, const uint8_t *databuf,
      return 0;
  }
  
+static int al_decode_frame(AVCodecContext *avctx, const uint8_t *databuf,
+                           int size, float **out_samples)
+{
+    ATRAC3Context *q = avctx->priv_data;
+    int ret, i;
+
+    /* Set the bitstream reader at the start of a channel sound unit. */
+    init_get_bits(&q->gb, databuf, size * 8);
+    /* single channels */
+    /* Decode the channel sound units. */
+    for (i = 0; i < avctx->channels; i++) {
+        ret = decode_channel_sound_unit(q, &q->gb, &q->units[i],
+                                        out_samples[i], i, q->coding_mode);
+        if (ret != 0)
+            return ret;
+        while (i < avctx->channels && get_bits_left(&q->gb) > 6 && show_bits(&q->gb, 6) != 0x28) {
+            skip_bits(&q->gb, 1);
+        }
+    }
+
+    /* Apply the iQMF synthesis filter. */
+    for (i = 0; i < avctx->channels; i++) {
+        float *p1 = out_samples[i];
+        float *p2 = p1 + 256;
+        float *p3 = p2 + 256;
+        float *p4 = p3 + 256;
+        ff_atrac_iqmf(p1, p2, 256, p1, q->units[i].delay_buf1, q->temp_buf);
+        ff_atrac_iqmf(p4, p3, 256, p3, q->units[i].delay_buf2, q->temp_buf);
+        ff_atrac_iqmf(p1, p3, 512, p1, q->units[i].delay_buf3, q->temp_buf);
+    }
+
+    return 0;
+}
+
  static int atrac3_decode_frame(AVCodecContext *avctx, void *data,
                                 int *got_frame_ptr, AVPacket *avpkt)
  {
@@ -771,6 +825,28 @@ static int atrac3_decode_frame(AVCodecContext *avctx, void *data,
      return avctx->block_align;
  }
  
+static int atrac3al_decode_frame(AVCodecContext *avctx, void *data,
+                                 int *got_frame_ptr, AVPacket *avpkt)
+{
+    AVFrame *frame = data;
+    int ret;
+
+    frame->nb_samples = SAMPLES_PER_FRAME;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    ret = al_decode_frame(avctx, avpkt->data, avpkt->size,
+                          (float **)frame->extended_data);
+    if (ret) {
+        av_log(avctx, AV_LOG_ERROR, "Frame decoding error!\n");
+        return ret;
+    }
+
+    *got_frame_ptr = 1;
+
+    return avpkt->size;
+}
+
  static av_cold void atrac3_init_static_data(void)
  {
      int i;
@@ -792,12 +868,12 @@ static av_cold void atrac3_init_static_data(void)
  static av_cold int atrac3_decode_init(AVCodecContext *avctx)
  {
      static int static_init_done;
-    int i, ret;
+    int i, js_pair, ret;
      int version, delay, samples_per_frame, frame_factor;
      const uint8_t *edata_ptr = avctx->extradata;
      ATRAC3Context *q = avctx->priv_data;
  
-    if (avctx->channels <= 0 || avctx->channels > 6) {
+    if (avctx->channels < MIN_CHANNELS || avctx->channels > MAX_CHANNELS) {
          av_log(avctx, AV_LOG_ERROR, "Channel configuration error!\n");
          return AVERROR(EINVAL);
      }
@@ -807,7 +883,12 @@ static av_cold int atrac3_decode_init(AVCodecContext *avctx)
      static_init_done = 1;
  
      /* Take care of the codec-specific extradata. */
-    if (avctx->extradata_size == 14) {
+    if (avctx->codec_id == AV_CODEC_ID_ATRAC3AL) {
+        version           = 4;
+        samples_per_frame = SAMPLES_PER_FRAME * avctx->channels;
+        delay             = 0x88E;
+        q->coding_mode    = SINGLE;
+    } else if (avctx->extradata_size == 14) {
          /* Parse the extradata, WAV format */
          av_log(avctx, AV_LOG_DEBUG, "[0-1] %d\n",
                 bytestream_get_le16(&edata_ptr));  // Unknown value always 1
@@ -870,8 +951,8 @@ static av_cold int atrac3_decode_init(AVCodecContext *avctx)
      if (q->coding_mode == SINGLE)
          av_log(avctx, AV_LOG_DEBUG, "Single channels detected.\n");
      else if (q->coding_mode == JOINT_STEREO) {
-        if (avctx->channels != 2) {
-            av_log(avctx, AV_LOG_ERROR, "Invalid coding mode\n");
+        if (avctx->channels % 2 == 1) { /* Joint stereo channels must be even */
+            av_log(avctx, AV_LOG_ERROR, "Invalid joint stereo channel configuration.\n");
              return AVERROR_INVALIDDATA;
          }
          av_log(avctx, AV_LOG_DEBUG, "Joint stereo detected.\n");
@@ -899,17 +980,19 @@ static av_cold int atrac3_decode_init(AVCodecContext *avctx)
      }
  
      /* init the joint-stereo decoding data */
-    q->weighting_delay[0] = 0;
-    q->weighting_delay[1] = 7;
-    q->weighting_delay[2] = 0;
-    q->weighting_delay[3] = 7;
-    q->weighting_delay[4] = 0;
-    q->weighting_delay[5] = 7;
-
-    for (i = 0; i < 4; i++) {
-        q->matrix_coeff_index_prev[i] = 3;
-        q->matrix_coeff_index_now[i]  = 3;
-        q->matrix_coeff_index_next[i] = 3;
+    for (js_pair = 0; js_pair < MAX_JS_PAIRS; js_pair++) {
+        q->weighting_delay[js_pair][0] = 0;
+        q->weighting_delay[js_pair][1] = 7;
+        q->weighting_delay[js_pair][2] = 0;
+        q->weighting_delay[js_pair][3] = 7;
+        q->weighting_delay[js_pair][4] = 0;
+        q->weighting_delay[js_pair][5] = 7;
+
+        for (i = 0; i < 4; i++) {
+            q->matrix_coeff_index_prev[js_pair][i] = 3;
+            q->matrix_coeff_index_now[js_pair][i]  = 3;
+            q->matrix_coeff_index_next[js_pair][i] = 3;
+        }
      }
  
      ff_atrac_init_gain_compensation(&q->gainc_ctx, 4, 3);
@@ -937,3 +1020,17 @@ AVCodec ff_atrac3_decoder = {
      .sample_fmts      = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                          AV_SAMPLE_FMT_NONE },
  };
+
+AVCodec ff_atrac3al_decoder = {
+    .name             = "atrac3al",
+    .long_name        = NULL_IF_CONFIG_SMALL("ATRAC3 AL (Adaptive TRansform Acoustic Coding 3 Advanced Lossless)"),
+    .type             = AVMEDIA_TYPE_AUDIO,
+    .id               = AV_CODEC_ID_ATRAC3AL,
+    .priv_data_size   = sizeof(ATRAC3Context),
+    .init             = atrac3_decode_init,
+    .close            = atrac3_decode_close,
+    .decode           = atrac3al_decode_frame,
+    .capabilities     = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
+    .sample_fmts      = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                        AV_SAMPLE_FMT_NONE },
+};