matroskadec: parse available blocks even when cluster parsing failed

[ffmpeg] / libavformat / mov.c
diff --git a/libavformat/mov.c b/libavformat/mov.c

index aa4f02ec7f4bb791afe0993876d7e9d938edb52f..62efb0e8f4903437f38db3d56aaccabd4b42ea4d 100644 (file)
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -27,6 +27,8 @@
  #include "riff.h"
  #include "isom.h"
  #include "dv.h"
+#include "libavcodec/mpeg4audio.h"
+#include "libavcodec/mpegaudiodata.h"
  
  #ifdef CONFIG_ZLIB
  #include <zlib.h>
@@ -126,7 +128,7 @@ typedef struct MOVStreamContext {
      unsigned int bytes_per_frame;
      unsigned int samples_per_frame;
      int dv_audio_container;
-    int pseudo_stream_id;
+    int pseudo_stream_id; ///< -1 means demux all ids
      int16_t audio_cid; ///< stsd audio compression id
      unsigned drefs_count;
      MOV_dref_t *drefs;
@@ -153,7 +155,6 @@ typedef struct MOVContext {
  
  /* those functions parse an atom */
  /* return code:
-  1: found what I wanted, exit
    0: continue to parse next atom
   <0: error occurred, exit
  */
@@ -362,6 +363,14 @@ static int mp4_read_descr(MOVContext *c, ByteIOContext *pb, int *tag)
  #define MP4DecConfigDescrTag            0x04
  #define MP4DecSpecificDescrTag          0x05
  
+static const AVCodecTag mp4_audio_types[] = {
+    { CODEC_ID_MP3ON4, 29 }, /* old mp3on4 draft */
+    { CODEC_ID_MP3ON4, 32 }, /* layer 1 */
+    { CODEC_ID_MP3ON4, 33 }, /* layer 2 */
+    { CODEC_ID_MP3ON4, 34 }, /* layer 3 */
+    { CODEC_ID_NONE,    0 },
+};
+
  static int mov_read_esds(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
  {
      AVStream *st = c->fc->streams[c->fc->nb_streams-1];
@@ -395,9 +404,24 @@ static int mov_read_esds(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
                  return AVERROR(ENOMEM);
              get_buffer(pb, st->codec->extradata, len);
              st->codec->extradata_size = len;
-            /* from mplayer */
-            if ((*st->codec->extradata >> 3) == 29) {
-                st->codec->codec_id = CODEC_ID_MP3ON4;
+            if (st->codec->codec_id == CODEC_ID_AAC) {
+                MPEG4AudioConfig cfg;
+                ff_mpeg4audio_get_config(&cfg, st->codec->extradata,
+                                         st->codec->extradata_size);
+                if (cfg.chan_config > 7)
+                    return -1;
+                st->codec->channels = ff_mpeg4audio_channels[cfg.chan_config];
+                if (cfg.object_type == 29 && cfg.sampling_index < 3) // old mp3on4
+                    st->codec->sample_rate = ff_mpa_freq_tab[cfg.sampling_index];
+                else
+                    st->codec->sample_rate = cfg.sample_rate; // ext sample rate ?
+                dprintf(c->fc, "mp4a config channels %d obj %d ext obj %d "
+                        "sample rate %d ext sample rate %d\n", st->codec->channels,
+                        cfg.object_type, cfg.ext_object_type,
+                        cfg.sample_rate, cfg.ext_sample_rate);
+                if (!(st->codec->codec_id = codec_get_id(mp4_audio_types,
+                                                         cfg.object_type)))
+                    st->codec->codec_id = CODEC_ID_AAC;
              }
          }
      }
@@ -451,7 +475,7 @@ static int mov_read_mdhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
      int lang;
  
      if (version > 1)
-        return 1; /* unsupported */
+        return -1; /* unsupported */
  
      get_be24(pb); /* flags */
      if (version == 1) {
@@ -641,16 +665,46 @@ static int mov_read_stco(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
      return 0;
  }
  
+/**
+ * Compute codec id for 'lpcm' tag.
+ * See CoreAudioTypes and AudioStreamBasicDescription at Apple.
+ */
+static int mov_get_lpcm_codec_id(int bps, int flags)
+{
+    if (flags & 1) { // floating point
+        if (flags & 2) { // big endian
+            if      (bps == 32) return CODEC_ID_PCM_F32BE;
+          //else if (bps == 64) return CODEC_ID_PCM_F64BE;
+        } else {
+          //if      (bps == 32) return CODEC_ID_PCM_F32LE;
+          //else if (bps == 64) return CODEC_ID_PCM_F64LE;
+        }
+    } else {
+        if (flags & 2) {
+            if      (bps == 8)
+                // signed integer
+                if (flags & 4)  return CODEC_ID_PCM_S8;
+                else            return CODEC_ID_PCM_U8;
+            else if (bps == 16) return CODEC_ID_PCM_S16BE;
+            else if (bps == 24) return CODEC_ID_PCM_S24BE;
+            else if (bps == 32) return CODEC_ID_PCM_S32BE;
+        } else {
+            if      (bps == 8)
+                if (flags & 4)  return CODEC_ID_PCM_S8;
+                else            return CODEC_ID_PCM_U8;
+            else if (bps == 16) return CODEC_ID_PCM_S16LE;
+            else if (bps == 24) return CODEC_ID_PCM_S24LE;
+            else if (bps == 32) return CODEC_ID_PCM_S32LE;
+        }
+    }
+    return 0;
+}
+
  static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
  {
      AVStream *st = c->fc->streams[c->fc->nb_streams-1];
      MOVStreamContext *sc = st->priv_data;
-    int entries, frames_per_sample;
-    uint32_t format;
-    uint8_t codec_name[32];
-    unsigned int color_depth;
-    int color_greyscale;
-    int j, pseudo_stream_id;
+    int j, entries, pseudo_stream_id;
  
      get_byte(pb); /* version */
      get_be24(pb); /* flags */
@@ -664,23 +718,25 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
          MOV_atom_t a = { 0, 0, 0 };
          offset_t start_pos = url_ftell(pb);
          int size = get_be32(pb); /* size */
-        format = get_le32(pb); /* data format */
+        uint32_t format = get_le32(pb); /* data format */
  
          get_be32(pb); /* reserved */
          get_be16(pb); /* reserved */
          dref_id = get_be16(pb);
  
          if (st->codec->codec_tag &&
+            st->codec->codec_tag != format &&
              (c->fc->video_codec_id ? codec_get_id(codec_movvideo_tags, format) != c->fc->video_codec_id
                                     : st->codec->codec_tag != MKTAG('j','p','e','g'))
             ){
              /* Multiple fourcc, we skip JPEG. This is not correct, we should
               * export it as a separate AVStream but this needs a few changes
               * in the MOV demuxer, patch welcome. */
+            av_log(c->fc, AV_LOG_WARNING, "multiple fourcc not supported\n");
              url_fskip(pb, size - (url_ftell(pb) - start_pos));
              continue;
          }
-        sc->pseudo_stream_id= pseudo_stream_id;
+        sc->pseudo_stream_id = st->codec->codec_tag ? -1 : pseudo_stream_id;
          sc->dref_id= dref_id;
  
          st->codec->codec_tag = format;
@@ -709,6 +765,10 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
                  (format >> 24) & 0xff, st->codec->codec_type);
  
          if(st->codec->codec_type==CODEC_TYPE_VIDEO) {
+            uint8_t codec_name[32];
+            unsigned int color_depth;
+            int color_greyscale;
+
              st->codec->codec_id = id;
              get_be16(pb); /* version */
              get_be16(pb); /* revision level */
@@ -722,11 +782,9 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
              get_be32(pb); /* horiz resolution */
              get_be32(pb); /* vert resolution */
              get_be32(pb); /* data size, always 0 */
-            frames_per_sample = get_be16(pb); /* frames per samples */
+            get_be16(pb); /* frames per samples */
  
-            dprintf(c->fc, "frames/samples = %d\n", frames_per_sample);
-
-            get_buffer(pb, codec_name, 32); /* codec name, pascal string (FIXME: true for mp4?) */
+            get_buffer(pb, codec_name, 32); /* codec name, pascal string */
              if (codec_name[0] <= 31) {
                  memcpy(st->codec->codec_name, &codec_name[1],codec_name[0]);
                  st->codec->codec_name[codec_name[0]] = 0;
@@ -809,7 +867,7 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
              } else
                  st->codec->palctrl = NULL;
          } else if(st->codec->codec_type==CODEC_TYPE_AUDIO) {
-            int bits_per_sample;
+            int bits_per_sample, flags;
              uint16_t version = get_be16(pb);
  
              st->codec->codec_id = id;
@@ -825,6 +883,28 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
  
              st->codec->sample_rate = ((get_be32(pb) >> 16));
  
+            //Read QT version 1 fields. In version 0 these do not exist.
+            dprintf(c->fc, "version =%d, isom =%d\n",version,c->isom);
+            if(!c->isom) {
+                if(version==1) {
+                    sc->samples_per_frame = get_be32(pb);
+                    get_be32(pb); /* bytes per packet */
+                    sc->bytes_per_frame = get_be32(pb);
+                    get_be32(pb); /* bytes per sample */
+                } else if(version==2) {
+                    get_be32(pb); /* sizeof struct only */
+                    st->codec->sample_rate = av_int2dbl(get_be64(pb)); /* float 64 */
+                    st->codec->channels = get_be32(pb);
+                    get_be32(pb); /* always 0x7F000000 */
+                    st->codec->bits_per_sample = get_be32(pb); /* bits per channel if sound is uncompressed */
+                    flags = get_be32(pb); /* lcpm format specific flag */
+                    sc->bytes_per_frame = get_be32(pb); /* bytes per audio packet if constant */
+                    sc->samples_per_frame = get_be32(pb); /* lpcm frames per audio packet if constant */
+                    if (format == MKTAG('l','p','c','m'))
+                        st->codec->codec_id = mov_get_lpcm_codec_id(st->codec->bits_per_sample, flags);
+                }
+            }
+
              switch (st->codec->codec_id) {
              case CODEC_ID_PCM_S8:
              case CODEC_ID_PCM_U8:
@@ -836,7 +916,9 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
                  if (st->codec->bits_per_sample == 8)
                      st->codec->codec_id = CODEC_ID_PCM_S8;
                  else if (st->codec->bits_per_sample == 24)
-                    st->codec->codec_id = CODEC_ID_PCM_S24BE;
+                    st->codec->codec_id =
+                        st->codec->codec_id == CODEC_ID_PCM_S16BE ?
+                        CODEC_ID_PCM_S24BE : CODEC_ID_PCM_S24LE;
                  break;
              /* set values for old format before stsd version 1 appeared */
              case CODEC_ID_MACE3:
@@ -859,26 +941,6 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
                  break;
              }
  
-            //Read QT version 1 fields. In version 0 these do not exist.
-            dprintf(c->fc, "version =%d, isom =%d\n",version,c->isom);
-            if(!c->isom) {
-                if(version==1) {
-                    sc->samples_per_frame = get_be32(pb);
-                    get_be32(pb); /* bytes per packet */
-                    sc->bytes_per_frame = get_be32(pb);
-                    get_be32(pb); /* bytes per sample */
-                } else if(version==2) {
-                    get_be32(pb); /* sizeof struct only */
-                    st->codec->sample_rate = av_int2dbl(get_be64(pb)); /* float 64 */
-                    st->codec->channels = get_be32(pb);
-                    get_be32(pb); /* always 0x7F000000 */
-                    get_be32(pb); /* bits per channel if sound is uncompressed */
-                    get_be32(pb); /* lcpm format specific flag */
-                    get_be32(pb); /* bytes per audio packet if constant */
-                    get_be32(pb); /* lpcm frames per audio packet if constant */
-                }
-            }
-
              bits_per_sample = av_get_bits_per_sample(st->codec->codec_id);
              if (bits_per_sample) {
                  st->codec->bits_per_sample = bits_per_sample;
@@ -1102,7 +1164,7 @@ static int mov_read_ctts(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
          int duration =get_be32(pb);
  
          if (duration < 0) {
-            av_log(c->fc, AV_LOG_ERROR, "negative ctts, ignoring\n");
+            av_log(c->fc, AV_LOG_WARNING, "negative ctts, ignoring\n");
              sc->ctts_count = 0;
              url_fskip(pb, 8 * (entries - i - 1));
              break;
@@ -1152,12 +1214,14 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
                          stss_index++;
                  }
                  sample_size = sc->sample_size > 0 ? sc->sample_size : sc->sample_sizes[current_sample];
-                dprintf(mov->fc, "AVIndex stream %d, sample %d, offset %"PRIx64", dts %"PRId64", "
-                        "size %d, distance %d, keyframe %d\n", st->index, current_sample,
-                        current_offset, current_dts, sample_size, distance, keyframe);
-                if(sc->sample_to_chunk[stsc_index].id - 1 == sc->pseudo_stream_id)
+                if(sc->pseudo_stream_id == -1 ||
+                   sc->sample_to_chunk[stsc_index].id - 1 == sc->pseudo_stream_id) {
                      av_add_index_entry(st, current_offset, current_dts, sample_size, distance,
                                      keyframe ? AVINDEX_KEYFRAME : 0);
+                    dprintf(mov->fc, "AVIndex stream %d, sample %d, offset %"PRIx64", dts %"PRId64", "
+                            "size %d, distance %d, keyframe %d\n", st->index, current_sample,
+                            current_offset, current_dts, sample_size, distance, keyframe);
+                }
                  current_offset += sample_size;
                  assert(sc->stts_data[stts_index].duration % sc->time_rate == 0);
                  current_dts += sc->stts_data[stts_index].duration / sc->time_rate;
@@ -1284,13 +1348,9 @@ static int mov_read_trak(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
          st->codec->width= 0; /* let decoder init width/height */
          st->codec->height= 0;
          break;
-#ifdef CONFIG_LIBFAAD
-    case CODEC_ID_AAC:
-#endif
  #ifdef CONFIG_VORBIS_DECODER
      case CODEC_ID_VORBIS:
  #endif
-    case CODEC_ID_MP3ON4:
          st->codec->sample_rate= 0; /* let decoder init parameters properly */
          break;
      }
@@ -1350,6 +1410,11 @@ static int mov_read_udta(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
  
  static int mov_read_tkhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
  {
+    int i;
+    int width;
+    int height;
+    int64_t disp_transform[2];
+    int display_matrix[3][2];
      AVStream *st = c->fc->streams[c->fc->nb_streams-1];
      int version = get_byte(pb);
  
@@ -1381,12 +1446,36 @@ static int mov_read_tkhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
      get_be16(pb); /* volume */
      get_be16(pb); /* reserved */
  
-    url_fskip(pb, 36); /* display matrix */
-
-    /* those are fixed-point */
-    get_be32(pb); /* track width */
-    get_be32(pb); /* track height */
+    //read in the display matrix (outlined in ISO 14496-12, Section 6.2.2)
+    // they're kept in fixed point format through all calculations
+    // ignore u,v,z b/c we don't need the scale factor to calc aspect ratio
+    for (i = 0; i < 3; i++) {
+        display_matrix[i][0] = get_be32(pb);   // 16.16 fixed point
+        display_matrix[i][1] = get_be32(pb);   // 16.16 fixed point
+        get_be32(pb);           // 2.30 fixed point (not used)
+    }
  
+    width = get_be32(pb);       // 16.16 fixed point track width
+    height = get_be32(pb);      // 16.16 fixed point track height
+
+    //transform the display width/height according to the matrix
+    // skip this if the display matrix is the default identity matrix
+    // to keep the same scale, use [width height 1<<16]
+    if (width && height &&
+        (display_matrix[0][0] != 65536 || display_matrix[0][1]           ||
+        display_matrix[1][0]           || display_matrix[1][1] != 65536  ||
+        display_matrix[2][0]           || display_matrix[2][1])) {
+        for (i = 0; i < 2; i++)
+            disp_transform[i] =
+                (int64_t)  width  * display_matrix[0][i] +
+                (int64_t)  height * display_matrix[1][i] +
+                ((int64_t) display_matrix[2][i] << 16);
+
+        //sample aspect ratio is new width/height divided by old width/height
+        st->codec->sample_aspect_ratio = av_d2q(
+            ((double) disp_transform[0] * height) /
+            ((double) disp_transform[1] * width), INT_MAX);
+    }
      return 0;
  }
  
@@ -1544,7 +1633,7 @@ static int mov_read_cmov(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
      uint8_t *cmov_data;
      uint8_t *moov_data; /* uncompressed data */
      long cmov_len, moov_len;
-    int ret;
+    int ret = -1;
  
      get_be32(pb); /* dcom atom */
      if (get_le32(pb) != MKTAG('d','c','o','m'))
@@ -1569,9 +1658,9 @@ static int mov_read_cmov(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
      }
      get_buffer(pb, cmov_data, cmov_len);
      if(uncompress (moov_data, (uLongf *) &moov_len, (const Bytef *)cmov_data, cmov_len) != Z_OK)
-        return -1;
+        goto free_and_return;
      if(init_put_byte(&ctx, moov_data, moov_len, 0, NULL, NULL, NULL, NULL) != 0)
-        return -1;
+        goto free_and_return;
      atom.type = MKTAG('m','o','o','v');
      atom.offset = 0;
      atom.size = moov_len;
@@ -1579,6 +1668,7 @@ static int mov_read_cmov(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
  //    { int fd = open("/tmp/uncompheader.mov", O_WRONLY | O_CREAT); write(fd, moov_data, moov_len); close(fd); }
  #endif
      ret = mov_read_default(c, &ctx, atom);
+free_and_return:
      av_free(moov_data);
      av_free(cmov_data);
      return ret;
@@ -1612,6 +1702,7 @@ static int mov_read_elst(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
  }
  
  static const MOVParseTableEntry mov_default_parse_table[] = {
+{ MKTAG('a','v','s','s'), mov_read_extradata },
  { MKTAG('c','o','6','4'), mov_read_stco },
  { MKTAG('c','t','t','s'), mov_read_ctts }, /* composition time to sample */
  { MKTAG('d','i','n','f'), mov_read_default },
@@ -1676,6 +1767,7 @@ static int mov_probe(AVProbeData *p)
          case MKTAG('m','d','a','t'):
          case MKTAG('p','n','o','t'): /* detect movs with preview pics like ew.mov and april.mov */
          case MKTAG('u','d','t','a'): /* Packet Video PVAuthor adds this and a lot of more junk */
+        case MKTAG('f','t','y','p'):
              return AVPROBE_SCORE_MAX;
          /* those are more common words, so rate then a bit less */
          case MKTAG('e','d','i','w'): /* xdcam files have reverted first tags */
@@ -1684,8 +1776,7 @@ static int mov_probe(AVProbeData *p)
          case MKTAG('j','u','n','k'):
          case MKTAG('p','i','c','t'):
              return AVPROBE_SCORE_MAX - 5;
-        case MKTAG(0x82,0x82,0x7f,0x7d ):
-        case MKTAG('f','t','y','p'):
+        case MKTAG(0x82,0x82,0x7f,0x7d):
          case MKTAG('s','k','i','p'):
          case MKTAG('u','u','i','d'):
          case MKTAG('p','r','f','l'):
@@ -1795,6 +1886,10 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt)
              sc->sample_to_ctime_sample = 0;
          }
      } else {
+        AVStream *st = s->streams[sc->ffindex];
+        int64_t next_dts = (sc->current_sample < sc->sample_count) ?
+            st->index_entries[sc->current_sample].timestamp : st->duration;
+        pkt->duration = next_dts - pkt->dts;
          pkt->pts = pkt->dts;
      }
      pkt->flags |= sample->flags & AVINDEX_KEYFRAME ? PKT_FLAG_KEY : 0;
@@ -1888,7 +1983,7 @@ static int mov_read_close(AVFormatContext *s)
  
  AVInputFormat mov_demuxer = {
      "mov,mp4,m4a,3gp,3g2,mj2",
-    "QuickTime/MPEG4/Motion JPEG 2000 format",
+    NULL_IF_CONFIG_SMALL("QuickTime/MPEG-4/Motion JPEG 2000 format"),
      sizeof(MOVContext),
      mov_probe,
      mov_read_header,