revert r14983, value is not sample aspect ratio, it is display aspect ratio

[ffmpeg] / libavformat / mov.c
diff --git a/libavformat/mov.c b/libavformat/mov.c

index 76c25a3e4d6de9d22f1d54d7f99f8bbabb3a0762..73ed11ddade5c5093865d1f084d6bb2bf7debfd0 100644 (file)
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -565,6 +565,12 @@ static int mov_read_enda(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
          case CODEC_ID_PCM_S32BE:
              st->codec->codec_id = CODEC_ID_PCM_S32LE;
              break;
+        case CODEC_ID_PCM_F32BE:
+            st->codec->codec_id = CODEC_ID_PCM_F32LE;
+            break;
+        case CODEC_ID_PCM_F64BE:
+            st->codec->codec_id = CODEC_ID_PCM_F64LE;
+            break;
          default:
              break;
          }
@@ -665,6 +671,41 @@ static int mov_read_stco(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
      return 0;
  }
  
+/**
+ * Compute codec id for 'lpcm' tag.
+ * See CoreAudioTypes and AudioStreamBasicDescription at Apple.
+ */
+static int mov_get_lpcm_codec_id(int bps, int flags)
+{
+    if (flags & 1) { // floating point
+        if (flags & 2) { // big endian
+            if      (bps == 32) return CODEC_ID_PCM_F32BE;
+            else if (bps == 64) return CODEC_ID_PCM_F64BE;
+        } else {
+            if      (bps == 32) return CODEC_ID_PCM_F32LE;
+            else if (bps == 64) return CODEC_ID_PCM_F64LE;
+        }
+    } else {
+        if (flags & 2) {
+            if      (bps == 8)
+                // signed integer
+                if (flags & 4)  return CODEC_ID_PCM_S8;
+                else            return CODEC_ID_PCM_U8;
+            else if (bps == 16) return CODEC_ID_PCM_S16BE;
+            else if (bps == 24) return CODEC_ID_PCM_S24BE;
+            else if (bps == 32) return CODEC_ID_PCM_S32BE;
+        } else {
+            if      (bps == 8)
+                if (flags & 4)  return CODEC_ID_PCM_S8;
+                else            return CODEC_ID_PCM_U8;
+            else if (bps == 16) return CODEC_ID_PCM_S16LE;
+            else if (bps == 24) return CODEC_ID_PCM_S24LE;
+            else if (bps == 32) return CODEC_ID_PCM_S32LE;
+        }
+    }
+    return 0;
+}
+
  static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
  {
      AVStream *st = c->fc->streams[c->fc->nb_streams-1];
@@ -755,13 +796,13 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
                  st->codec->codec_name[codec_name[0]] = 0;
              }
  
-            st->codec->bits_per_sample = get_be16(pb); /* depth */
+            st->codec->bits_per_coded_sample = get_be16(pb); /* depth */
              st->codec->color_table_id = get_be16(pb); /* colortable id */
              dprintf(c->fc, "depth %d, ctab id %d\n",
-                   st->codec->bits_per_sample, st->codec->color_table_id);
+                   st->codec->bits_per_coded_sample, st->codec->color_table_id);
              /* figure out the palette situation */
-            color_depth = st->codec->bits_per_sample & 0x1F;
-            color_greyscale = st->codec->bits_per_sample & 0x20;
+            color_depth = st->codec->bits_per_coded_sample & 0x1F;
+            color_greyscale = st->codec->bits_per_coded_sample & 0x20;
  
              /* if the depth is 2, 4, or 8 bpp, file is palettized */
              if ((color_depth == 2) || (color_depth == 4) ||
@@ -773,7 +814,7 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
                  if (color_greyscale) {
                      int color_index, color_dec;
                      /* compute the greyscale palette */
-                    st->codec->bits_per_sample = color_depth;
+                    st->codec->bits_per_coded_sample = color_depth;
                      color_count = 1 << color_depth;
                      color_index = 255;
                      color_dec = 256 / (color_count - 1);
@@ -832,7 +873,7 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
              } else
                  st->codec->palctrl = NULL;
          } else if(st->codec->codec_type==CODEC_TYPE_AUDIO) {
-            int bits_per_sample;
+            int bits_per_sample, flags;
              uint16_t version = get_be16(pb);
  
              st->codec->codec_id = id;
@@ -841,25 +882,49 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
  
              st->codec->channels = get_be16(pb);             /* channel count */
              dprintf(c->fc, "audio channels %d\n", st->codec->channels);
-            st->codec->bits_per_sample = get_be16(pb);      /* sample size */
+            st->codec->bits_per_coded_sample = get_be16(pb);      /* sample size */
  
              sc->audio_cid = get_be16(pb);
              get_be16(pb); /* packet size = 0 */
  
              st->codec->sample_rate = ((get_be32(pb) >> 16));
  
+            //Read QT version 1 fields. In version 0 these do not exist.
+            dprintf(c->fc, "version =%d, isom =%d\n",version,c->isom);
+            if(!c->isom) {
+                if(version==1) {
+                    sc->samples_per_frame = get_be32(pb);
+                    get_be32(pb); /* bytes per packet */
+                    sc->bytes_per_frame = get_be32(pb);
+                    get_be32(pb); /* bytes per sample */
+                } else if(version==2) {
+                    get_be32(pb); /* sizeof struct only */
+                    st->codec->sample_rate = av_int2dbl(get_be64(pb)); /* float 64 */
+                    st->codec->channels = get_be32(pb);
+                    get_be32(pb); /* always 0x7F000000 */
+                    st->codec->bits_per_coded_sample = get_be32(pb); /* bits per channel if sound is uncompressed */
+                    flags = get_be32(pb); /* lcpm format specific flag */
+                    sc->bytes_per_frame = get_be32(pb); /* bytes per audio packet if constant */
+                    sc->samples_per_frame = get_be32(pb); /* lpcm frames per audio packet if constant */
+                    if (format == MKTAG('l','p','c','m'))
+                        st->codec->codec_id = mov_get_lpcm_codec_id(st->codec->bits_per_coded_sample, flags);
+                }
+            }
+
              switch (st->codec->codec_id) {
              case CODEC_ID_PCM_S8:
              case CODEC_ID_PCM_U8:
-                if (st->codec->bits_per_sample == 16)
+                if (st->codec->bits_per_coded_sample == 16)
                      st->codec->codec_id = CODEC_ID_PCM_S16BE;
                  break;
              case CODEC_ID_PCM_S16LE:
              case CODEC_ID_PCM_S16BE:
-                if (st->codec->bits_per_sample == 8)
+                if (st->codec->bits_per_coded_sample == 8)
                      st->codec->codec_id = CODEC_ID_PCM_S8;
-                else if (st->codec->bits_per_sample == 24)
-                    st->codec->codec_id = CODEC_ID_PCM_S24BE;
+                else if (st->codec->bits_per_coded_sample == 24)
+                    st->codec->codec_id =
+                        st->codec->codec_id == CODEC_ID_PCM_S16BE ?
+                        CODEC_ID_PCM_S24BE : CODEC_ID_PCM_S24LE;
                  break;
              /* set values for old format before stsd version 1 appeared */
              case CODEC_ID_MACE3:
@@ -882,29 +947,9 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
                  break;
              }
  
-            //Read QT version 1 fields. In version 0 these do not exist.
-            dprintf(c->fc, "version =%d, isom =%d\n",version,c->isom);
-            if(!c->isom) {
-                if(version==1) {
-                    sc->samples_per_frame = get_be32(pb);
-                    get_be32(pb); /* bytes per packet */
-                    sc->bytes_per_frame = get_be32(pb);
-                    get_be32(pb); /* bytes per sample */
-                } else if(version==2) {
-                    get_be32(pb); /* sizeof struct only */
-                    st->codec->sample_rate = av_int2dbl(get_be64(pb)); /* float 64 */
-                    st->codec->channels = get_be32(pb);
-                    get_be32(pb); /* always 0x7F000000 */
-                    get_be32(pb); /* bits per channel if sound is uncompressed */
-                    get_be32(pb); /* lcpm format specific flag */
-                    get_be32(pb); /* bytes per audio packet if constant */
-                    get_be32(pb); /* lpcm frames per audio packet if constant */
-                }
-            }
-
              bits_per_sample = av_get_bits_per_sample(st->codec->codec_id);
              if (bits_per_sample) {
-                st->codec->bits_per_sample = bits_per_sample;
+                st->codec->bits_per_coded_sample = bits_per_sample;
                  sc->sample_size = (bits_per_sample >> 3) * st->codec->channels;
              }
          } else if(st->codec->codec_type==CODEC_TYPE_SUBTITLE){
@@ -1125,7 +1170,7 @@ static int mov_read_ctts(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
          int duration =get_be32(pb);
  
          if (duration < 0) {
-            av_log(c->fc, AV_LOG_ERROR, "negative ctts, ignoring\n");
+            av_log(c->fc, AV_LOG_WARNING, "negative ctts, ignoring\n");
              sc->ctts_count = 0;
              url_fskip(pb, 8 * (entries - i - 1));
              break;
@@ -1309,11 +1354,6 @@ static int mov_read_trak(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
          st->codec->width= 0; /* let decoder init width/height */
          st->codec->height= 0;
          break;
-#ifdef CONFIG_VORBIS_DECODER
-    case CODEC_ID_VORBIS:
-#endif
-        st->codec->sample_rate= 0; /* let decoder init parameters properly */
-        break;
      }
  
      /* Do not need those anymore. */
@@ -1371,6 +1411,11 @@ static int mov_read_udta(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
  
  static int mov_read_tkhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
  {
+    int i;
+    int width;
+    int height;
+    int64_t disp_transform[2];
+    int display_matrix[3][2];
      AVStream *st = c->fc->streams[c->fc->nb_streams-1];
      int version = get_byte(pb);
  
@@ -1402,12 +1447,36 @@ static int mov_read_tkhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
      get_be16(pb); /* volume */
      get_be16(pb); /* reserved */
  
-    url_fskip(pb, 36); /* display matrix */
-
-    /* those are fixed-point */
-    get_be32(pb); /* track width */
-    get_be32(pb); /* track height */
+    //read in the display matrix (outlined in ISO 14496-12, Section 6.2.2)
+    // they're kept in fixed point format through all calculations
+    // ignore u,v,z b/c we don't need the scale factor to calc aspect ratio
+    for (i = 0; i < 3; i++) {
+        display_matrix[i][0] = get_be32(pb);   // 16.16 fixed point
+        display_matrix[i][1] = get_be32(pb);   // 16.16 fixed point
+        get_be32(pb);           // 2.30 fixed point (not used)
+    }
  
+    width = get_be32(pb);       // 16.16 fixed point track width
+    height = get_be32(pb);      // 16.16 fixed point track height
+
+    //transform the display width/height according to the matrix
+    // skip this if the display matrix is the default identity matrix
+    // to keep the same scale, use [width height 1<<16]
+    if (width && height &&
+        (display_matrix[0][0] != 65536 || display_matrix[0][1]           ||
+        display_matrix[1][0]           || display_matrix[1][1] != 65536  ||
+        display_matrix[2][0]           || display_matrix[2][1])) {
+        for (i = 0; i < 2; i++)
+            disp_transform[i] =
+                (int64_t)  width  * display_matrix[0][i] +
+                (int64_t)  height * display_matrix[1][i] +
+                ((int64_t) display_matrix[2][i] << 16);
+
+        //sample aspect ratio is new width/height divided by old width/height
+        st->sample_aspect_ratio = av_d2q(
+            ((double) disp_transform[0] * height) /
+            ((double) disp_transform[1] * width), INT_MAX);
+    }
      return 0;
  }
  
@@ -1634,6 +1703,7 @@ static int mov_read_elst(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
  }
  
  static const MOVParseTableEntry mov_default_parse_table[] = {
+{ MKTAG('a','v','s','s'), mov_read_extradata },
  { MKTAG('c','o','6','4'), mov_read_stco },
  { MKTAG('c','t','t','s'), mov_read_ctts }, /* composition time to sample */
  { MKTAG('d','i','n','f'), mov_read_default },
@@ -1698,6 +1768,7 @@ static int mov_probe(AVProbeData *p)
          case MKTAG('m','d','a','t'):
          case MKTAG('p','n','o','t'): /* detect movs with preview pics like ew.mov and april.mov */
          case MKTAG('u','d','t','a'): /* Packet Video PVAuthor adds this and a lot of more junk */
+        case MKTAG('f','t','y','p'):
              return AVPROBE_SCORE_MAX;
          /* those are more common words, so rate then a bit less */
          case MKTAG('e','d','i','w'): /* xdcam files have reverted first tags */
@@ -1706,8 +1777,7 @@ static int mov_probe(AVProbeData *p)
          case MKTAG('j','u','n','k'):
          case MKTAG('p','i','c','t'):
              return AVPROBE_SCORE_MAX - 5;
-        case MKTAG(0x82,0x82,0x7f,0x7d ):
-        case MKTAG('f','t','y','p'):
+        case MKTAG(0x82,0x82,0x7f,0x7d):
          case MKTAG('s','k','i','p'):
          case MKTAG('u','u','i','d'):
          case MKTAG('p','r','f','l'):