X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavformat%2Fmov.c;h=e40abcaf8d4442d108dfa3f7a4b1a5995281e6f2;hb=8d1a5af700c7942d16b6643f357657b240ff8ab3;hp=774e57736ebe31eec9e284573b259cac8da637bb;hpb=9ce84dd8b4dca869651b7fa7a08ba08b1014b760;p=ffmpeg diff --git a/libavformat/mov.c b/libavformat/mov.c index 774e57736eb..e40abcaf8d4 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -1,6 +1,6 @@ /* * MOV demuxer - * Copyright (c) 2001 Fabrice Bellard. + * Copyright (c) 2001 Fabrice Bellard * * This file is part of FFmpeg. * @@ -23,12 +23,15 @@ //#define DEBUG +#include "libavutil/intreadwrite.h" #include "avformat.h" #include "riff.h" #include "isom.h" #include "dv.h" +#include "libavcodec/mpeg4audio.h" +#include "libavcodec/mpegaudiodata.h" -#ifdef CONFIG_ZLIB +#if CONFIG_ZLIB #include #endif @@ -67,18 +70,18 @@ typedef struct { int first; int count; int id; -} MOV_stsc_t; +} MOVStsc; typedef struct { uint32_t type; char *path; -} MOV_dref_t; +} MOVDref; typedef struct { uint32_t type; int64_t offset; int64_t size; /* total size (excluding the size and type fields) */ -} MOV_atom_t; +} MOVAtom; struct MOVParseTableEntry; @@ -107,14 +110,13 @@ typedef struct MOVStreamContext { unsigned int chunk_count; int64_t *chunk_offsets; unsigned int stts_count; - MOV_stts_t *stts_data; + MOVStts *stts_data; unsigned int ctts_count; - MOV_stts_t *ctts_data; - unsigned int edit_count; /* number of 'edit' (elst atom) */ - unsigned int sample_to_chunk_sz; - MOV_stsc_t *sample_to_chunk; - int sample_to_ctime_index; - int sample_to_ctime_sample; + MOVStts *ctts_data; + unsigned int stsc_count; + MOVStsc *stsc_data; + int ctts_index; + int ctts_sample; unsigned int sample_size; unsigned int sample_count; int *sample_sizes; @@ -122,15 +124,19 @@ typedef struct MOVStreamContext { int *keyframes; int time_scale; int time_rate; + int time_offset; ///< time offset of the first edit list entry int current_sample; unsigned int bytes_per_frame; unsigned int samples_per_frame; int dv_audio_container; - int pseudo_stream_id; + int pseudo_stream_id; ///< -1 means demux all ids int16_t audio_cid; ///< stsd audio compression id unsigned drefs_count; - MOV_dref_t *drefs; + MOVDref *drefs; int dref_id; + int wrong_dts; ///< dts are wrong due to negative ctts + int width; ///< tkhd width + int height; ///< tkhd height } MOVStreamContext; typedef struct MOVContext { @@ -146,6 +152,7 @@ typedef struct MOVContext { MOVFragment fragment; ///< current fragment in moof atom MOVTrackExt *trex_data; unsigned trex_count; + int itunes_metadata; ///< metadata are itunes style } MOVContext; @@ -153,22 +160,21 @@ typedef struct MOVContext { /* those functions parse an atom */ /* return code: - 1: found what I wanted, exit 0: continue to parse next atom <0: error occurred, exit */ /* links atom IDs to parse functions */ typedef struct MOVParseTableEntry { uint32_t type; - int (*parse)(MOVContext *ctx, ByteIOContext *pb, MOV_atom_t atom); + int (*parse)(MOVContext *ctx, ByteIOContext *pb, MOVAtom atom); } MOVParseTableEntry; static const MOVParseTableEntry mov_default_parse_table[]; -static int mov_read_default(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_default(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { int64_t total_size = 0; - MOV_atom_t a; + MOVAtom a; int i; int err = 0; @@ -209,7 +215,7 @@ static int mov_read_default(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) if (mov_default_parse_table[i].type == 0) { /* skip leaf atoms data */ url_fskip(pb, a.size); } else { - offset_t start_pos = url_ftell(pb); + int64_t start_pos = url_ftell(pb); int64_t left; err = mov_default_parse_table[i].parse(c, pb, a); if (url_is_streamed(pb) && c->found_moov && c->found_mdat) @@ -229,7 +235,7 @@ static int mov_read_default(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return err; } -static int mov_read_dref(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_dref(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; MOVStreamContext *sc = st->priv_data; @@ -243,9 +249,9 @@ static int mov_read_dref(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) sc->drefs = av_mallocz(entries * sizeof(*sc->drefs)); for (i = 0; i < sc->drefs_count; i++) { - MOV_dref_t *dref = &sc->drefs[i]; + MOVDref *dref = &sc->drefs[i]; uint32_t size = get_be32(pb); - offset_t next = url_ftell(pb) + size - 4; + int64_t next = url_ftell(pb) + size - 4; dref->type = get_le32(pb); get_be32(pb); // version + flags @@ -297,7 +303,7 @@ static int mov_read_dref(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; } -static int mov_read_hdlr(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_hdlr(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; uint32_t type; @@ -316,13 +322,13 @@ static int mov_read_hdlr(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) *((char *)&type), ((char *)&type)[1], ((char *)&type)[2], ((char *)&type)[3]); if(!ctype) c->isom = 1; - if(type == MKTAG('v', 'i', 'd', 'e')) + if (type == MKTAG('v','i','d','e')) st->codec->codec_type = CODEC_TYPE_VIDEO; - else if(type == MKTAG('s', 'o', 'u', 'n')) + else if(type == MKTAG('s','o','u','n')) st->codec->codec_type = CODEC_TYPE_AUDIO; - else if(type == MKTAG('m', '1', 'a', ' ')) + else if(type == MKTAG('m','1','a',' ')) st->codec->codec_id = CODEC_ID_MP2; - else if(type == MKTAG('s', 'u', 'b', 'p')) { + else if(type == MKTAG('s','u','b','p')) { st->codec->codec_type = CODEC_TYPE_SUBTITLE; } get_be32(pb); /* component manufacture */ @@ -362,7 +368,15 @@ static int mp4_read_descr(MOVContext *c, ByteIOContext *pb, int *tag) #define MP4DecConfigDescrTag 0x04 #define MP4DecSpecificDescrTag 0x05 -static int mov_read_esds(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static const AVCodecTag mp4_audio_types[] = { + { CODEC_ID_MP3ON4, 29 }, /* old mp3on4 draft */ + { CODEC_ID_MP3ON4, 32 }, /* layer 1 */ + { CODEC_ID_MP3ON4, 33 }, /* layer 2 */ + { CODEC_ID_MP3ON4, 34 }, /* layer 3 */ + { CODEC_ID_NONE, 0 }, +}; + +static int mov_read_esds(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; int tag, len; @@ -395,17 +409,49 @@ static int mov_read_esds(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return AVERROR(ENOMEM); get_buffer(pb, st->codec->extradata, len); st->codec->extradata_size = len; - /* from mplayer */ - if ((*st->codec->extradata >> 3) == 29) { - st->codec->codec_id = CODEC_ID_MP3ON4; + if (st->codec->codec_id == CODEC_ID_AAC) { + MPEG4AudioConfig cfg; + ff_mpeg4audio_get_config(&cfg, st->codec->extradata, + st->codec->extradata_size); + if (cfg.chan_config > 7) + return -1; + st->codec->channels = ff_mpeg4audio_channels[cfg.chan_config]; + if (cfg.object_type == 29 && cfg.sampling_index < 3) // old mp3on4 + st->codec->sample_rate = ff_mpa_freq_tab[cfg.sampling_index]; + else + st->codec->sample_rate = cfg.sample_rate; // ext sample rate ? + dprintf(c->fc, "mp4a config channels %d obj %d ext obj %d " + "sample rate %d ext sample rate %d\n", st->codec->channels, + cfg.object_type, cfg.ext_object_type, + cfg.sample_rate, cfg.ext_sample_rate); + if (!(st->codec->codec_id = codec_get_id(mp4_audio_types, + cfg.object_type))) + st->codec->codec_id = CODEC_ID_AAC; } } } return 0; } +static int mov_read_pasp(MOVContext *c, ByteIOContext *pb, MOVAtom atom) +{ + const int num = get_be32(pb); + const int den = get_be32(pb); + AVStream * const st = c->fc->streams[c->fc->nb_streams-1]; + if (den != 0) { + if ((st->sample_aspect_ratio.den != 1 || st->sample_aspect_ratio.num) && // default + (den != st->sample_aspect_ratio.den || num != st->sample_aspect_ratio.num)) + av_log(c->fc, AV_LOG_WARNING, + "sample aspect ratio already set to %d:%d, overriding by 'pasp' atom\n", + st->sample_aspect_ratio.num, st->sample_aspect_ratio.den); + st->sample_aspect_ratio.num = num; + st->sample_aspect_ratio.den = den; + } + return 0; +} + /* this atom contains actual media data */ -static int mov_read_mdat(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_mdat(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { if(atom.size == 0) /* wrong one (MP4) */ return 0; @@ -413,7 +459,7 @@ static int mov_read_mdat(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; /* now go for moov */ } -static int mov_read_ftyp(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_ftyp(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { uint32_t type = get_le32(pb); @@ -426,7 +472,7 @@ static int mov_read_ftyp(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) } /* this atom should contain all header atoms */ -static int mov_read_moov(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_moov(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { if (mov_read_default(c, pb, atom) < 0) return -1; @@ -436,22 +482,22 @@ static int mov_read_moov(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; /* now go for mdat */ } -static int mov_read_moof(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_moof(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { c->fragment.moof_offset = url_ftell(pb) - 8; dprintf(c->fc, "moof offset %llx\n", c->fragment.moof_offset); return mov_read_default(c, pb, atom); } -static int mov_read_mdhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_mdhd(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; MOVStreamContext *sc = st->priv_data; int version = get_byte(pb); - int lang; + unsigned lang; if (version > 1) - return 1; /* unsupported */ + return -1; /* unsupported */ get_be24(pb); /* flags */ if (version == 1) { @@ -472,7 +518,7 @@ static int mov_read_mdhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; } -static int mov_read_mvhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_mvhd(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { int version = get_byte(pb); /* version */ get_be24(pb); /* flags */ @@ -508,7 +554,7 @@ static int mov_read_mvhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; } -static int mov_read_smi(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_smi(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; @@ -528,12 +574,13 @@ static int mov_read_smi(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; } -static int mov_read_enda(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_enda(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; int little_endian = get_be16(pb); - if (little_endian) { + dprintf(c->fc, "enda %d\n", little_endian); + if (little_endian == 1) { switch (st->codec->codec_id) { case CODEC_ID_PCM_S24BE: st->codec->codec_id = CODEC_ID_PCM_S24LE; @@ -541,6 +588,12 @@ static int mov_read_enda(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) case CODEC_ID_PCM_S32BE: st->codec->codec_id = CODEC_ID_PCM_S32LE; break; + case CODEC_ID_PCM_F32BE: + st->codec->codec_id = CODEC_ID_PCM_F32LE; + break; + case CODEC_ID_PCM_F64BE: + st->codec->codec_id = CODEC_ID_PCM_F64LE; + break; default: break; } @@ -549,11 +602,16 @@ static int mov_read_enda(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) } /* FIXME modify qdm2/svq3/h264 decoders to take full atom as extradata */ -static int mov_read_extradata(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_extradata(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { - AVStream *st = c->fc->streams[c->fc->nb_streams-1]; - uint64_t size= (uint64_t)st->codec->extradata_size + atom.size + 8 + FF_INPUT_BUFFER_PADDING_SIZE; + AVStream *st; + uint64_t size; uint8_t *buf; + + if (c->fc->nb_streams < 1) // will happen with jp2 files + return 0; + st= c->fc->streams[c->fc->nb_streams-1]; + size= (uint64_t)st->codec->extradata_size + atom.size + 8 + FF_INPUT_BUFFER_PADDING_SIZE; if(size > INT_MAX || (uint64_t)atom.size > INT_MAX) return -1; buf= av_realloc(st->codec->extradata, size); @@ -568,7 +626,7 @@ static int mov_read_extradata(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; } -static int mov_read_wave(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_wave(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; @@ -595,7 +653,7 @@ static int mov_read_wave(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) * This function reads atom content and puts data in extradata without tag * nor size unlike mov_read_extradata. */ -static int mov_read_glbl(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_glbl(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; @@ -611,7 +669,7 @@ static int mov_read_glbl(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; } -static int mov_read_stco(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_stco(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; MOVStreamContext *sc = st->priv_data; @@ -629,39 +687,58 @@ static int mov_read_stco(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) sc->chunk_offsets = av_malloc(entries * sizeof(int64_t)); if (!sc->chunk_offsets) return -1; - if (atom.type == MKTAG('s', 't', 'c', 'o')) { - for(i=0; ichunk_offsets[i] = get_be32(pb); - } - } else if (atom.type == MKTAG('c', 'o', '6', '4')) { - for(i=0; ichunk_offsets[i] = get_be64(pb); - } - } else + else return -1; return 0; } -static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +/** + * Compute codec id for 'lpcm' tag. + * See CoreAudioTypes and AudioStreamBasicDescription at Apple. + */ +static enum CodecID mov_get_lpcm_codec_id(int bps, int flags) +{ + if (flags & 1) { // floating point + if (flags & 2) { // big endian + if (bps == 32) return CODEC_ID_PCM_F32BE; + else if (bps == 64) return CODEC_ID_PCM_F64BE; + } else { + if (bps == 32) return CODEC_ID_PCM_F32LE; + else if (bps == 64) return CODEC_ID_PCM_F64LE; + } + } else { + if (flags & 2) { + if (bps == 8) + // signed integer + if (flags & 4) return CODEC_ID_PCM_S8; + else return CODEC_ID_PCM_U8; + else if (bps == 16) return CODEC_ID_PCM_S16BE; + else if (bps == 24) return CODEC_ID_PCM_S24BE; + else if (bps == 32) return CODEC_ID_PCM_S32BE; + } else { + if (bps == 8) + if (flags & 4) return CODEC_ID_PCM_S8; + else return CODEC_ID_PCM_U8; + else if (bps == 16) return CODEC_ID_PCM_S16LE; + else if (bps == 24) return CODEC_ID_PCM_S24LE; + else if (bps == 32) return CODEC_ID_PCM_S32LE; + } + } + return CODEC_ID_NONE; +} + +static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; MOVStreamContext *sc = st->priv_data; - int entries, frames_per_sample; - uint32_t format; - uint8_t codec_name[32]; - - /* for palette traversal */ - unsigned int color_depth; - unsigned int color_start; - unsigned int color_count; - unsigned int color_end; - int color_index; - int color_dec; - int color_greyscale; - const uint8_t *color_table; - int j, pseudo_stream_id; - unsigned char r, g, b; + int j, entries, pseudo_stream_id; get_byte(pb); /* version */ get_be24(pb); /* flags */ @@ -672,37 +749,39 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) //Parsing Sample description table enum CodecID id; int dref_id; - MOV_atom_t a = { 0, 0, 0 }; - offset_t start_pos = url_ftell(pb); + MOVAtom a = { 0, 0, 0 }; + int64_t start_pos = url_ftell(pb); int size = get_be32(pb); /* size */ - format = get_le32(pb); /* data format */ + uint32_t format = get_le32(pb); /* data format */ get_be32(pb); /* reserved */ get_be16(pb); /* reserved */ dref_id = get_be16(pb); if (st->codec->codec_tag && + st->codec->codec_tag != format && (c->fc->video_codec_id ? codec_get_id(codec_movvideo_tags, format) != c->fc->video_codec_id - : st->codec->codec_tag != MKTAG('j', 'p', 'e', 'g')) + : st->codec->codec_tag != MKTAG('j','p','e','g')) ){ /* Multiple fourcc, we skip JPEG. This is not correct, we should * export it as a separate AVStream but this needs a few changes * in the MOV demuxer, patch welcome. */ + av_log(c->fc, AV_LOG_WARNING, "multiple fourcc not supported\n"); url_fskip(pb, size - (url_ftell(pb) - start_pos)); continue; } - sc->pseudo_stream_id= pseudo_stream_id; + sc->pseudo_stream_id = st->codec->codec_tag ? -1 : pseudo_stream_id; sc->dref_id= dref_id; st->codec->codec_tag = format; id = codec_get_id(codec_movaudio_tags, format); - if (id<=0 && (format&0xFFFF) == 'm' + ('s'<<8)) + if (id<=0 && (format&0xFFFF) == 'm'+('s'<<8)) id = codec_get_id(codec_wav_tags, bswap_32(format)&0xFFFF); if (st->codec->codec_type != CODEC_TYPE_VIDEO && id > 0) { st->codec->codec_type = CODEC_TYPE_AUDIO; } else if (st->codec->codec_type != CODEC_TYPE_AUDIO && /* do not overwrite codec type */ - format && format != MKTAG('m', 'p', '4', 's')) { /* skip old asf mpeg4 tag */ + format && format != MKTAG('m','p','4','s')) { /* skip old asf mpeg4 tag */ id = codec_get_id(codec_movvideo_tags, format); if (id <= 0) id = codec_get_id(codec_bmp_tags, format); @@ -720,6 +799,10 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) (format >> 24) & 0xff, st->codec->codec_type); if(st->codec->codec_type==CODEC_TYPE_VIDEO) { + uint8_t codec_name[32]; + unsigned int color_depth; + int color_greyscale; + st->codec->codec_id = id; get_be16(pb); /* version */ get_be16(pb); /* revision level */ @@ -733,30 +816,33 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) get_be32(pb); /* horiz resolution */ get_be32(pb); /* vert resolution */ get_be32(pb); /* data size, always 0 */ - frames_per_sample = get_be16(pb); /* frames per samples */ - - dprintf(c->fc, "frames/samples = %d\n", frames_per_sample); + get_be16(pb); /* frames per samples */ - get_buffer(pb, codec_name, 32); /* codec name, pascal string (FIXME: true for mp4?) */ + get_buffer(pb, codec_name, 32); /* codec name, pascal string */ if (codec_name[0] <= 31) { memcpy(st->codec->codec_name, &codec_name[1],codec_name[0]); st->codec->codec_name[codec_name[0]] = 0; } - st->codec->bits_per_sample = get_be16(pb); /* depth */ + st->codec->bits_per_coded_sample = get_be16(pb); /* depth */ st->codec->color_table_id = get_be16(pb); /* colortable id */ dprintf(c->fc, "depth %d, ctab id %d\n", - st->codec->bits_per_sample, st->codec->color_table_id); + st->codec->bits_per_coded_sample, st->codec->color_table_id); /* figure out the palette situation */ - color_depth = st->codec->bits_per_sample & 0x1F; - color_greyscale = st->codec->bits_per_sample & 0x20; + color_depth = st->codec->bits_per_coded_sample & 0x1F; + color_greyscale = st->codec->bits_per_coded_sample & 0x20; /* if the depth is 2, 4, or 8 bpp, file is palettized */ if ((color_depth == 2) || (color_depth == 4) || (color_depth == 8)) { + /* for palette traversal */ + unsigned int color_start, color_count, color_end; + unsigned char r, g, b; + if (color_greyscale) { + int color_index, color_dec; /* compute the greyscale palette */ - st->codec->bits_per_sample = color_depth; + st->codec->bits_per_coded_sample = color_depth; color_count = 1 << color_depth; color_index = 255; color_dec = 256 / (color_count - 1); @@ -769,6 +855,7 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) color_index = 0; } } else if (st->codec->color_table_id) { + const uint8_t *color_table; /* if flag bit 3 is set, use the default palette */ color_count = 1 << color_depth; if (color_depth == 2) @@ -814,7 +901,7 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) } else st->codec->palctrl = NULL; } else if(st->codec->codec_type==CODEC_TYPE_AUDIO) { - int bits_per_sample; + int bits_per_sample, flags; uint16_t version = get_be16(pb); st->codec->codec_id = id; @@ -823,25 +910,49 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) st->codec->channels = get_be16(pb); /* channel count */ dprintf(c->fc, "audio channels %d\n", st->codec->channels); - st->codec->bits_per_sample = get_be16(pb); /* sample size */ + st->codec->bits_per_coded_sample = get_be16(pb); /* sample size */ sc->audio_cid = get_be16(pb); get_be16(pb); /* packet size = 0 */ st->codec->sample_rate = ((get_be32(pb) >> 16)); + //Read QT version 1 fields. In version 0 these do not exist. + dprintf(c->fc, "version =%d, isom =%d\n",version,c->isom); + if(!c->isom) { + if(version==1) { + sc->samples_per_frame = get_be32(pb); + get_be32(pb); /* bytes per packet */ + sc->bytes_per_frame = get_be32(pb); + get_be32(pb); /* bytes per sample */ + } else if(version==2) { + get_be32(pb); /* sizeof struct only */ + st->codec->sample_rate = av_int2dbl(get_be64(pb)); /* float 64 */ + st->codec->channels = get_be32(pb); + get_be32(pb); /* always 0x7F000000 */ + st->codec->bits_per_coded_sample = get_be32(pb); /* bits per channel if sound is uncompressed */ + flags = get_be32(pb); /* lcpm format specific flag */ + sc->bytes_per_frame = get_be32(pb); /* bytes per audio packet if constant */ + sc->samples_per_frame = get_be32(pb); /* lpcm frames per audio packet if constant */ + if (format == MKTAG('l','p','c','m')) + st->codec->codec_id = mov_get_lpcm_codec_id(st->codec->bits_per_coded_sample, flags); + } + } + switch (st->codec->codec_id) { case CODEC_ID_PCM_S8: case CODEC_ID_PCM_U8: - if (st->codec->bits_per_sample == 16) + if (st->codec->bits_per_coded_sample == 16) st->codec->codec_id = CODEC_ID_PCM_S16BE; break; case CODEC_ID_PCM_S16LE: case CODEC_ID_PCM_S16BE: - if (st->codec->bits_per_sample == 8) + if (st->codec->bits_per_coded_sample == 8) st->codec->codec_id = CODEC_ID_PCM_S8; - else if (st->codec->bits_per_sample == 24) - st->codec->codec_id = CODEC_ID_PCM_S24BE; + else if (st->codec->bits_per_coded_sample == 24) + st->codec->codec_id = + st->codec->codec_id == CODEC_ID_PCM_S16BE ? + CODEC_ID_PCM_S24BE : CODEC_ID_PCM_S24LE; break; /* set values for old format before stsd version 1 appeared */ case CODEC_ID_MACE3: @@ -856,37 +967,27 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) sc->samples_per_frame = 64; sc->bytes_per_frame = 34*st->codec->channels; break; + case CODEC_ID_GSM: + sc->samples_per_frame = 160; + sc->bytes_per_frame = 33; + break; default: break; } - //Read QT version 1 fields. In version 0 these do not exist. - dprintf(c->fc, "version =%d, isom =%d\n",version,c->isom); - if(!c->isom) { - if(version==1) { - sc->samples_per_frame = get_be32(pb); - get_be32(pb); /* bytes per packet */ - sc->bytes_per_frame = get_be32(pb); - get_be32(pb); /* bytes per sample */ - } else if(version==2) { - get_be32(pb); /* sizeof struct only */ - st->codec->sample_rate = av_int2dbl(get_be64(pb)); /* float 64 */ - st->codec->channels = get_be32(pb); - get_be32(pb); /* always 0x7F000000 */ - get_be32(pb); /* bits per channel if sound is uncompressed */ - get_be32(pb); /* lcpm format specific flag */ - get_be32(pb); /* bytes per audio packet if constant */ - get_be32(pb); /* lpcm frames per audio packet if constant */ - } - } - bits_per_sample = av_get_bits_per_sample(st->codec->codec_id); if (bits_per_sample) { - st->codec->bits_per_sample = bits_per_sample; + st->codec->bits_per_coded_sample = bits_per_sample; sc->sample_size = (bits_per_sample >> 3) * st->codec->channels; } } else if(st->codec->codec_type==CODEC_TYPE_SUBTITLE){ + // ttxt stsd contains display flags, justification, background + // color, fonts, and default styles, so fake an atom to read it + MOVAtom fake_atom = { .size = size - (url_ftell(pb) - start_pos) }; + mov_read_glbl(c, pb, fake_atom); st->codec->codec_id= id; + st->codec->width = sc->width; + st->codec->height = sc->height; } else { /* other codec type, just skip (rtp, mp4s, tmcd ...) */ url_fskip(pb, size - (url_ftell(pb) - start_pos)); @@ -905,9 +1006,9 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) /* special codec parameters handling */ switch (st->codec->codec_id) { -#ifdef CONFIG_DV_DEMUXER +#if CONFIG_DV_DEMUXER case CODEC_ID_DVAUDIO: - c->dv_fctx = av_alloc_format_context(); + c->dv_fctx = avformat_alloc_context(); c->dv_demux = dv_init_demux(c->dv_fctx); if (!c->dv_demux) { av_log(c->fc, AV_LOG_ERROR, "dv demux context init error\n"); @@ -918,23 +1019,36 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) break; #endif /* no ifdef since parameters are always those */ - case CODEC_ID_AMR_WB: - st->codec->sample_rate= 16000; + case CODEC_ID_QCELP: + st->codec->frame_size= 160; st->codec->channels= 1; /* really needed */ break; case CODEC_ID_AMR_NB: - st->codec->sample_rate= 8000; + case CODEC_ID_AMR_WB: + st->codec->frame_size= sc->samples_per_frame; st->codec->channels= 1; /* really needed */ + /* force sample rate for amr, stsd in 3gp does not store sample rate */ + if (st->codec->codec_id == CODEC_ID_AMR_NB) + st->codec->sample_rate = 8000; + else if (st->codec->codec_id == CODEC_ID_AMR_WB) + st->codec->sample_rate = 16000; break; case CODEC_ID_MP2: case CODEC_ID_MP3: st->codec->codec_type = CODEC_TYPE_AUDIO; /* force type after stsd for m1a hdlr */ st->need_parsing = AVSTREAM_PARSE_FULL; break; + case CODEC_ID_GSM: case CODEC_ID_ADPCM_MS: case CODEC_ID_ADPCM_IMA_WAV: st->codec->block_align = sc->bytes_per_frame; break; + case CODEC_ID_ALAC: + if (st->codec->extradata_size == 36) { + st->codec->frame_size = AV_RB32(st->codec->extradata+12); + st->codec->channels = AV_RB8 (st->codec->extradata+21); + } + break; default: break; } @@ -942,7 +1056,7 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; } -static int mov_read_stsc(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_stsc(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; MOVStreamContext *sc = st->priv_data; @@ -953,24 +1067,24 @@ static int mov_read_stsc(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) entries = get_be32(pb); - if(entries >= UINT_MAX / sizeof(MOV_stsc_t)) + if(entries >= UINT_MAX / sizeof(*sc->stsc_data)) return -1; dprintf(c->fc, "track[%i].stsc.entries = %i\n", c->fc->nb_streams-1, entries); - sc->sample_to_chunk_sz = entries; - sc->sample_to_chunk = av_malloc(entries * sizeof(MOV_stsc_t)); - if (!sc->sample_to_chunk) + sc->stsc_count = entries; + sc->stsc_data = av_malloc(entries * sizeof(*sc->stsc_data)); + if (!sc->stsc_data) return -1; for(i=0; isample_to_chunk[i].first = get_be32(pb); - sc->sample_to_chunk[i].count = get_be32(pb); - sc->sample_to_chunk[i].id = get_be32(pb); + sc->stsc_data[i].first = get_be32(pb); + sc->stsc_data[i].count = get_be32(pb); + sc->stsc_data[i].id = get_be32(pb); } return 0; } -static int mov_read_stss(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_stss(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; MOVStreamContext *sc = st->priv_data; @@ -998,7 +1112,7 @@ static int mov_read_stss(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; } -static int mov_read_stsz(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_stsz(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; MOVStreamContext *sc = st->priv_data; @@ -1028,7 +1142,7 @@ static int mov_read_stsz(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; } -static int mov_read_stts(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_stts(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; MOVStreamContext *sc = st->priv_data; @@ -1039,17 +1153,15 @@ static int mov_read_stts(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) get_byte(pb); /* version */ get_be24(pb); /* flags */ entries = get_be32(pb); - if(entries >= UINT_MAX / sizeof(MOV_stts_t)) + if(entries >= UINT_MAX / sizeof(*sc->stts_data)) return -1; sc->stts_count = entries; - sc->stts_data = av_malloc(entries * sizeof(MOV_stts_t)); + sc->stts_data = av_malloc(entries * sizeof(*sc->stts_data)); if (!sc->stts_data) return -1; dprintf(c->fc, "track[%i].stts.entries = %i\n", c->fc->nb_streams-1, entries); - sc->time_rate=0; - for(i=0; istts_data[i].count= sample_count; sc->stts_data[i].duration= sample_duration; - sc->time_rate= ff_gcd(sc->time_rate, sample_duration); + sc->time_rate= av_gcd(sc->time_rate, sample_duration); dprintf(c->fc, "sample_count=%d, sample_duration=%d\n",sample_count,sample_duration); @@ -1073,7 +1185,7 @@ static int mov_read_stts(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; } -static int mov_read_ctts(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_ctts(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st = c->fc->streams[c->fc->nb_streams-1]; MOVStreamContext *sc = st->priv_data; @@ -1082,11 +1194,11 @@ static int mov_read_ctts(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) get_byte(pb); /* version */ get_be24(pb); /* flags */ entries = get_be32(pb); - if(entries >= UINT_MAX / sizeof(MOV_stts_t)) + if(entries >= UINT_MAX / sizeof(*sc->ctts_data)) return -1; sc->ctts_count = entries; - sc->ctts_data = av_malloc(entries * sizeof(MOV_stts_t)); + sc->ctts_data = av_malloc(entries * sizeof(*sc->ctts_data)); if (!sc->ctts_data) return -1; dprintf(c->fc, "track[%i].ctts.entries = %i\n", c->fc->nb_streams-1, entries); @@ -1096,15 +1208,13 @@ static int mov_read_ctts(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) int duration =get_be32(pb); if (duration < 0) { - av_log(c->fc, AV_LOG_ERROR, "negative ctts, ignoring\n"); - sc->ctts_count = 0; - url_fskip(pb, 8 * (entries - i - 1)); - break; + sc->wrong_dts = 1; + st->codec->has_b_frames = 1; } sc->ctts_data[i].count = count; sc->ctts_data[i].duration= duration; - sc->time_rate= ff_gcd(sc->time_rate, duration); + sc->time_rate= av_gcd(sc->time_rate, FFABS(duration)); } return 0; } @@ -1112,13 +1222,19 @@ static int mov_read_ctts(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) static void mov_build_index(MOVContext *mov, AVStream *st) { MOVStreamContext *sc = st->priv_data; - offset_t current_offset; + int64_t current_offset; int64_t current_dts = 0; unsigned int stts_index = 0; unsigned int stsc_index = 0; unsigned int stss_index = 0; unsigned int i, j; + /* adjust first dts according to edit list */ + if (sc->time_offset) { + assert(sc->time_offset % sc->time_rate == 0); + current_dts = - (sc->time_offset / sc->time_rate); + } + /* only use old uncompressed audio chunk demuxing when stts specifies it */ if (!(st->codec->codec_type == CODEC_TYPE_AUDIO && sc->stts_count == 1 && sc->stts_data[0].duration == 1)) { @@ -1131,10 +1247,10 @@ static void mov_build_index(MOVContext *mov, AVStream *st) st->nb_frames = sc->sample_count; for (i = 0; i < sc->chunk_count; i++) { current_offset = sc->chunk_offsets[i]; - if (stsc_index + 1 < sc->sample_to_chunk_sz && - i + 1 == sc->sample_to_chunk[stsc_index + 1].first) + if (stsc_index + 1 < sc->stsc_count && + i + 1 == sc->stsc_data[stsc_index + 1].first) stsc_index++; - for (j = 0; j < sc->sample_to_chunk[stsc_index].count; j++) { + for (j = 0; j < sc->stsc_data[stsc_index].count; j++) { if (current_sample >= sc->sample_count) { av_log(mov->fc, AV_LOG_ERROR, "wrong sample count\n"); goto out; @@ -1146,12 +1262,14 @@ static void mov_build_index(MOVContext *mov, AVStream *st) stss_index++; } sample_size = sc->sample_size > 0 ? sc->sample_size : sc->sample_sizes[current_sample]; - dprintf(mov->fc, "AVIndex stream %d, sample %d, offset %"PRIx64", dts %"PRId64", " - "size %d, distance %d, keyframe %d\n", st->index, current_sample, - current_offset, current_dts, sample_size, distance, keyframe); - if(sc->sample_to_chunk[stsc_index].id - 1 == sc->pseudo_stream_id) + if(sc->pseudo_stream_id == -1 || + sc->stsc_data[stsc_index].id - 1 == sc->pseudo_stream_id) { av_add_index_entry(st, current_offset, current_dts, sample_size, distance, keyframe ? AVINDEX_KEYFRAME : 0); + dprintf(mov->fc, "AVIndex stream %d, sample %d, offset %"PRIx64", dts %"PRId64", " + "size %d, distance %d, keyframe %d\n", st->index, current_sample, + current_offset, current_dts, sample_size, distance, keyframe); + } current_offset += sample_size; assert(sc->stts_data[stts_index].duration % sc->time_rate == 0); current_dts += sc->stts_data[stts_index].duration / sc->time_rate; @@ -1169,14 +1287,14 @@ static void mov_build_index(MOVContext *mov, AVStream *st) unsigned int frames = 1; for (i = 0; i < sc->chunk_count; i++) { current_offset = sc->chunk_offsets[i]; - if (stsc_index + 1 < sc->sample_to_chunk_sz && - i + 1 == sc->sample_to_chunk[stsc_index + 1].first) + if (stsc_index + 1 < sc->stsc_count && + i + 1 == sc->stsc_data[stsc_index + 1].first) stsc_index++; - chunk_samples = sc->sample_to_chunk[stsc_index].count; + chunk_samples = sc->stsc_data[stsc_index].count; /* get chunk size, beware of alaw/ulaw/mace */ if (sc->samples_per_frame > 0 && (chunk_samples * sc->bytes_per_frame % sc->samples_per_frame == 0)) { - if (sc->samples_per_frame < 1024) + if (sc->samples_per_frame < 160) chunk_size = chunk_samples * sc->bytes_per_frame / sc->samples_per_frame; else { chunk_size = sc->bytes_per_frame; @@ -1215,7 +1333,7 @@ static void mov_build_index(MOVContext *mov, AVStream *st) sc->sample_count = st->nb_index_entries; } -static int mov_read_trak(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_trak(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { AVStream *st; MOVStreamContext *sc; @@ -1228,14 +1346,14 @@ static int mov_read_trak(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) st->priv_data = sc; st->codec->codec_type = CODEC_TYPE_DATA; - st->start_time = 0; /* XXX: check */ + sc->ffindex = st->index; if ((ret = mov_read_default(c, pb, atom)) < 0) return ret; /* sanity checks */ - if(!sc->stts_count || !sc->chunk_count || !sc->sample_to_chunk_sz || - (!sc->sample_size && !sc->sample_count)){ + if(sc->chunk_count && (!sc->stts_count || !sc->stsc_count || + (!sc->sample_size && !sc->sample_count))){ av_log(c->fc, AV_LOG_ERROR, "stream %d, missing mandatory atoms, broken header\n", st->index); sc->sample_count = 0; //ignore track @@ -1247,50 +1365,45 @@ static int mov_read_trak(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) sc->time_scale= c->time_scale; av_set_pts_info(st, 64, sc->time_rate, sc->time_scale); - if (st->codec->codec_type == CODEC_TYPE_AUDIO && sc->stts_count == 1) - st->codec->frame_size = av_rescale(sc->time_rate, st->codec->sample_rate, sc->time_scale); + if (st->codec->codec_type == CODEC_TYPE_AUDIO && + !st->codec->frame_size && sc->stts_count == 1) { + st->codec->frame_size = av_rescale(sc->stts_data[0].duration, + st->codec->sample_rate, sc->time_scale); + dprintf(c->fc, "frame size %d\n", st->codec->frame_size); + } if(st->duration != AV_NOPTS_VALUE){ assert(st->duration % sc->time_rate == 0); st->duration /= sc->time_rate; } - sc->ffindex = st->index; + mov_build_index(c, st); if (sc->dref_id-1 < sc->drefs_count && sc->drefs[sc->dref_id-1].path) { if (url_fopen(&sc->pb, sc->drefs[sc->dref_id-1].path, URL_RDONLY) < 0) - av_log(c->fc, AV_LOG_ERROR, "stream %d, error opening external essence: %s\n", - st->index, strerror(errno)); + av_log(c->fc, AV_LOG_ERROR, "stream %d, error opening file %s: %s\n", + st->index, sc->drefs[sc->dref_id-1].path, strerror(errno)); } else sc->pb = c->fc->pb; switch (st->codec->codec_id) { -#ifdef CONFIG_H261_DECODER +#if CONFIG_H261_DECODER case CODEC_ID_H261: #endif -#ifdef CONFIG_H263_DECODER +#if CONFIG_H263_DECODER case CODEC_ID_H263: #endif -#ifdef CONFIG_MPEG4_DECODER +#if CONFIG_MPEG4_DECODER case CODEC_ID_MPEG4: #endif st->codec->width= 0; /* let decoder init width/height */ st->codec->height= 0; break; -#ifdef CONFIG_LIBFAAD - case CODEC_ID_AAC: -#endif -#ifdef CONFIG_VORBIS_DECODER - case CODEC_ID_VORBIS: -#endif - case CODEC_ID_MP3ON4: - st->codec->sample_rate= 0; /* let decoder init parameters properly */ - break; } /* Do not need those anymore. */ av_freep(&sc->chunk_offsets); - av_freep(&sc->sample_to_chunk); + av_freep(&sc->stsc_data); av_freep(&sc->sample_sizes); av_freep(&sc->keyframes); av_freep(&sc->stts_data); @@ -1298,52 +1411,84 @@ static int mov_read_trak(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; } -static void mov_parse_udta_string(ByteIOContext *pb, char *str, int size) +static int mov_read_ilst(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { - uint16_t str_size = get_be16(pb); /* string length */; - - get_be16(pb); /* skip language */ - get_buffer(pb, str, FFMIN(size, str_size)); + int ret; + c->itunes_metadata = 1; + ret = mov_read_default(c, pb, atom); + c->itunes_metadata = 0; + return ret; } -static int mov_read_udta(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_meta(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { - uint64_t end = url_ftell(pb) + atom.size; - - while (url_ftell(pb) + 8 < end) { - uint32_t tag_size = get_be32(pb); - uint32_t tag = get_le32(pb); - uint64_t next = url_ftell(pb) + tag_size - 8; - - if (next > end) // stop if tag_size is wrong - break; + url_fskip(pb, 4); // version + flags + atom.size -= 4; + return mov_read_default(c, pb, atom); +} - switch (tag) { - case MKTAG(0xa9,'n','a','m'): - mov_parse_udta_string(pb, c->fc->title, sizeof(c->fc->title)); - break; - case MKTAG(0xa9,'w','r','t'): - mov_parse_udta_string(pb, c->fc->author, sizeof(c->fc->author)); - break; - case MKTAG(0xa9,'c','p','y'): - mov_parse_udta_string(pb, c->fc->copyright, sizeof(c->fc->copyright)); - break; - case MKTAG(0xa9,'i','n','f'): - mov_parse_udta_string(pb, c->fc->comment, sizeof(c->fc->comment)); - break; - default: - break; - } +static int mov_read_trkn(MOVContext *c, ByteIOContext *pb, MOVAtom atom) +{ + get_be32(pb); // type + get_be32(pb); // unknown + c->fc->track = get_be32(pb); + dprintf(c->fc, "%.4s %d\n", (char*)&atom.type, c->fc->track); + return 0; +} - url_fseek(pb, next, SEEK_SET); +static int mov_read_udta_string(MOVContext *c, ByteIOContext *pb, MOVAtom atom) +{ + char *str = NULL; + int size; + uint16_t str_size; + + if (c->itunes_metadata) { + int data_size = get_be32(pb); + int tag = get_le32(pb); + if (tag == MKTAG('d','a','t','a')) { + get_be32(pb); // type + get_be32(pb); // unknown + str_size = data_size - 16; + atom.size -= 16; + } else return 0; + } else { + str_size = get_be16(pb); // string length + get_be16(pb); // language + atom.size -= 4; } + switch (atom.type) { + case MKTAG(0xa9,'n','a','m'): + str = c->fc->title; size = sizeof(c->fc->title); break; + case MKTAG(0xa9,'A','R','T'): + case MKTAG(0xa9,'w','r','t'): + str = c->fc->author; size = sizeof(c->fc->author); break; + case MKTAG(0xa9,'c','p','y'): + str = c->fc->copyright; size = sizeof(c->fc->copyright); break; + case MKTAG(0xa9,'c','m','t'): + case MKTAG(0xa9,'i','n','f'): + str = c->fc->comment; size = sizeof(c->fc->comment); break; + case MKTAG(0xa9,'a','l','b'): + str = c->fc->album; size = sizeof(c->fc->album); break; + } + if (!str) + return 0; + if (atom.size < 0) + return -1; + get_buffer(pb, str, FFMIN3(size, str_size, atom.size)); + dprintf(c->fc, "%.4s %s %d %lld\n", (char*)&atom.type, str, str_size, atom.size); return 0; } -static int mov_read_tkhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_tkhd(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { + int i; + int width; + int height; + int64_t disp_transform[2]; + int display_matrix[3][2]; AVStream *st = c->fc->streams[c->fc->nb_streams-1]; + MOVStreamContext *sc = st->priv_data; int version = get_byte(pb); get_be24(pb); /* flags */ @@ -1363,7 +1508,7 @@ static int mov_read_tkhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) } st->id = (int)get_be32(pb); /* track id (NOT 0 !)*/ get_be32(pb); /* reserved */ - st->start_time = 0; /* check */ + /* highlevel (considering edits) duration in movie timebase */ (version == 1) ? get_be64(pb) : get_be32(pb); get_be32(pb); /* reserved */ @@ -1374,16 +1519,42 @@ static int mov_read_tkhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) get_be16(pb); /* volume */ get_be16(pb); /* reserved */ - url_fskip(pb, 36); /* display matrix */ - - /* those are fixed-point */ - get_be32(pb); /* track width */ - get_be32(pb); /* track height */ + //read in the display matrix (outlined in ISO 14496-12, Section 6.2.2) + // they're kept in fixed point format through all calculations + // ignore u,v,z b/c we don't need the scale factor to calc aspect ratio + for (i = 0; i < 3; i++) { + display_matrix[i][0] = get_be32(pb); // 16.16 fixed point + display_matrix[i][1] = get_be32(pb); // 16.16 fixed point + get_be32(pb); // 2.30 fixed point (not used) + } + width = get_be32(pb); // 16.16 fixed point track width + height = get_be32(pb); // 16.16 fixed point track height + sc->width = width >> 16; + sc->height = height >> 16; + + //transform the display width/height according to the matrix + // skip this if the display matrix is the default identity matrix + // to keep the same scale, use [width height 1<<16] + if (width && height && + (display_matrix[0][0] != 65536 || display_matrix[0][1] || + display_matrix[1][0] || display_matrix[1][1] != 65536 || + display_matrix[2][0] || display_matrix[2][1])) { + for (i = 0; i < 2; i++) + disp_transform[i] = + (int64_t) width * display_matrix[0][i] + + (int64_t) height * display_matrix[1][i] + + ((int64_t) display_matrix[2][i] << 16); + + //sample aspect ratio is new width/height divided by old width/height + st->sample_aspect_ratio = av_d2q( + ((double) disp_transform[0] * height) / + ((double) disp_transform[1] * width), INT_MAX); + } return 0; } -static int mov_read_tfhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_tfhd(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { MOVFragment *frag = &c->fragment; MOVTrackExt *trex = NULL; @@ -1418,7 +1589,7 @@ static int mov_read_tfhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; } -static int mov_read_trex(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_trex(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { MOVTrackExt *trex; @@ -1438,21 +1609,23 @@ static int mov_read_trex(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return 0; } -static int mov_read_trun(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_trun(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { MOVFragment *frag = &c->fragment; - AVStream *st = c->fc->streams[frag->track_id-1]; - MOVStreamContext *sc = st->priv_data; + AVStream *st; + MOVStreamContext *sc; uint64_t offset; int64_t dts; int data_offset = 0; unsigned entries, first_sample_flags = frag->flags; int flags, distance, i; + if (!frag->track_id || frag->track_id > c->fc->nb_streams) + return -1; + st = c->fc->streams[frag->track_id-1]; + sc = st->priv_data; if (sc->pseudo_stream_id+1 != frag->stsd_id) return 0; - if (!st->nb_index_entries) - return -1; get_byte(pb); /* version */ flags = get_be24(pb); entries = get_be32(pb); @@ -1507,7 +1680,7 @@ static int mov_read_trun(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) /* this atom should be null (from specs), but some buggy files put the 'moov' atom inside it... */ /* like the files created with Adobe Premiere 5.0, for samples see */ /* http://graphics.tudelft.nl/~wouter/publications/soundtests/ */ -static int mov_read_wide(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_wide(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { int err; @@ -1520,7 +1693,7 @@ static int mov_read_wide(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) atom.type = get_le32(pb); atom.offset += 8; atom.size -= 8; - if (atom.type != MKTAG('m', 'd', 'a', 't')) { + if (atom.type != MKTAG('m','d','a','t')) { url_fskip(pb, atom.size); return 0; } @@ -1528,24 +1701,24 @@ static int mov_read_wide(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) return err; } -static int mov_read_cmov(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_cmov(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { -#ifdef CONFIG_ZLIB +#if CONFIG_ZLIB ByteIOContext ctx; uint8_t *cmov_data; uint8_t *moov_data; /* uncompressed data */ long cmov_len, moov_len; - int ret; + int ret = -1; get_be32(pb); /* dcom atom */ - if (get_le32(pb) != MKTAG( 'd', 'c', 'o', 'm' )) + if (get_le32(pb) != MKTAG('d','c','o','m')) return -1; - if (get_le32(pb) != MKTAG( 'z', 'l', 'i', 'b' )) { + if (get_le32(pb) != MKTAG('z','l','i','b')) { av_log(NULL, AV_LOG_ERROR, "unknown compression for cmov atom !"); return -1; } get_be32(pb); /* cmvd atom */ - if (get_le32(pb) != MKTAG( 'c', 'm', 'v', 'd' )) + if (get_le32(pb) != MKTAG('c','m','v','d')) return -1; moov_len = get_be32(pb); /* uncompressed size */ cmov_len = atom.size - 6 * 4; @@ -1560,16 +1733,17 @@ static int mov_read_cmov(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) } get_buffer(pb, cmov_data, cmov_len); if(uncompress (moov_data, (uLongf *) &moov_len, (const Bytef *)cmov_data, cmov_len) != Z_OK) - return -1; + goto free_and_return; if(init_put_byte(&ctx, moov_data, moov_len, 0, NULL, NULL, NULL, NULL) != 0) - return -1; - atom.type = MKTAG( 'm', 'o', 'o', 'v' ); + goto free_and_return; + atom.type = MKTAG('m','o','o','v'); atom.offset = 0; atom.size = moov_len; #ifdef DEBUG // { int fd = open("/tmp/uncompheader.mov", O_WRONLY | O_CREAT); write(fd, moov_data, moov_len); close(fd); } #endif ret = mov_read_default(c, &ctx, atom); +free_and_return: av_free(moov_data); av_free(cmov_data); return ret; @@ -1580,70 +1754,89 @@ static int mov_read_cmov(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) } /* edit list atom */ -static int mov_read_elst(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom) +static int mov_read_elst(MOVContext *c, ByteIOContext *pb, MOVAtom atom) { MOVStreamContext *sc = c->fc->streams[c->fc->nb_streams-1]->priv_data; int i, edit_count; get_byte(pb); /* version */ get_be24(pb); /* flags */ - edit_count= sc->edit_count = get_be32(pb); /* entries */ + edit_count = get_be32(pb); /* entries */ for(i=0; ifc, AV_LOG_WARNING, "edit list not starting at 0, " - "a/v desync might occur, patch welcome\n"); + if (i == 0 && time != -1) { + sc->time_offset = time; + sc->time_rate = av_gcd(sc->time_rate, time); + } } - dprintf(c->fc, "track[%i].edit_count = %i\n", c->fc->nb_streams-1, sc->edit_count); + + if(edit_count > 1) + av_log(c->fc, AV_LOG_WARNING, "multiple edit list entries, " + "a/v desync might occur, patch welcome\n"); + + dprintf(c->fc, "track[%i].edit_count = %i\n", c->fc->nb_streams-1, edit_count); return 0; } static const MOVParseTableEntry mov_default_parse_table[] = { -{ MKTAG( 'c', 'o', '6', '4' ), mov_read_stco }, -{ MKTAG( 'c', 't', 't', 's' ), mov_read_ctts }, /* composition time to sample */ -{ MKTAG( 'd', 'i', 'n', 'f' ), mov_read_default }, -{ MKTAG( 'd', 'r', 'e', 'f' ), mov_read_dref }, -{ MKTAG( 'e', 'd', 't', 's' ), mov_read_default }, -{ MKTAG( 'e', 'l', 's', 't' ), mov_read_elst }, -{ MKTAG( 'e', 'n', 'd', 'a' ), mov_read_enda }, -{ MKTAG( 'f', 'i', 'e', 'l' ), mov_read_extradata }, -{ MKTAG( 'f', 't', 'y', 'p' ), mov_read_ftyp }, -{ MKTAG( 'g', 'l', 'b', 'l' ), mov_read_glbl }, -{ MKTAG( 'h', 'd', 'l', 'r' ), mov_read_hdlr }, -{ MKTAG( 'j', 'p', '2', 'h' ), mov_read_extradata }, -{ MKTAG( 'm', 'd', 'a', 't' ), mov_read_mdat }, -{ MKTAG( 'm', 'd', 'h', 'd' ), mov_read_mdhd }, -{ MKTAG( 'm', 'd', 'i', 'a' ), mov_read_default }, -{ MKTAG( 'm', 'i', 'n', 'f' ), mov_read_default }, -{ MKTAG( 'm', 'o', 'o', 'f' ), mov_read_moof }, -{ MKTAG( 'm', 'o', 'o', 'v' ), mov_read_moov }, -{ MKTAG( 'm', 'v', 'e', 'x' ), mov_read_default }, -{ MKTAG( 'm', 'v', 'h', 'd' ), mov_read_mvhd }, -{ MKTAG( 'S', 'M', 'I', ' ' ), mov_read_smi }, /* Sorenson extension ??? */ -{ MKTAG( 'a', 'l', 'a', 'c' ), mov_read_extradata }, /* alac specific atom */ -{ MKTAG( 'a', 'v', 'c', 'C' ), mov_read_glbl }, -{ MKTAG( 's', 't', 'b', 'l' ), mov_read_default }, -{ MKTAG( 's', 't', 'c', 'o' ), mov_read_stco }, -{ MKTAG( 's', 't', 's', 'c' ), mov_read_stsc }, -{ MKTAG( 's', 't', 's', 'd' ), mov_read_stsd }, /* sample description */ -{ MKTAG( 's', 't', 's', 's' ), mov_read_stss }, /* sync sample */ -{ MKTAG( 's', 't', 's', 'z' ), mov_read_stsz }, /* sample size */ -{ MKTAG( 's', 't', 't', 's' ), mov_read_stts }, -{ MKTAG( 't', 'k', 'h', 'd' ), mov_read_tkhd }, /* track header */ -{ MKTAG( 't', 'f', 'h', 'd' ), mov_read_tfhd }, /* track fragment header */ -{ MKTAG( 't', 'r', 'a', 'k' ), mov_read_trak }, -{ MKTAG( 't', 'r', 'a', 'f' ), mov_read_default }, -{ MKTAG( 't', 'r', 'e', 'x' ), mov_read_trex }, -{ MKTAG( 't', 'r', 'u', 'n' ), mov_read_trun }, -{ MKTAG( 'u', 'd', 't', 'a' ), mov_read_udta }, -{ MKTAG( 'w', 'a', 'v', 'e' ), mov_read_wave }, -{ MKTAG( 'e', 's', 'd', 's' ), mov_read_esds }, -{ MKTAG( 'w', 'i', 'd', 'e' ), mov_read_wide }, /* place holder */ -{ MKTAG( 'c', 'm', 'o', 'v' ), mov_read_cmov }, +{ MKTAG('a','v','s','s'), mov_read_extradata }, +{ MKTAG('c','o','6','4'), mov_read_stco }, +{ MKTAG('c','t','t','s'), mov_read_ctts }, /* composition time to sample */ +{ MKTAG('d','i','n','f'), mov_read_default }, +{ MKTAG('d','r','e','f'), mov_read_dref }, +{ MKTAG('e','d','t','s'), mov_read_default }, +{ MKTAG('e','l','s','t'), mov_read_elst }, +{ MKTAG('e','n','d','a'), mov_read_enda }, +{ MKTAG('f','i','e','l'), mov_read_extradata }, +{ MKTAG('f','t','y','p'), mov_read_ftyp }, +{ MKTAG('g','l','b','l'), mov_read_glbl }, +{ MKTAG('h','d','l','r'), mov_read_hdlr }, +{ MKTAG('i','l','s','t'), mov_read_ilst }, +{ MKTAG('j','p','2','h'), mov_read_extradata }, +{ MKTAG('m','d','a','t'), mov_read_mdat }, +{ MKTAG('m','d','h','d'), mov_read_mdhd }, +{ MKTAG('m','d','i','a'), mov_read_default }, +{ MKTAG('m','e','t','a'), mov_read_meta }, +{ MKTAG('m','i','n','f'), mov_read_default }, +{ MKTAG('m','o','o','f'), mov_read_moof }, +{ MKTAG('m','o','o','v'), mov_read_moov }, +{ MKTAG('m','v','e','x'), mov_read_default }, +{ MKTAG('m','v','h','d'), mov_read_mvhd }, +{ MKTAG('S','M','I',' '), mov_read_smi }, /* Sorenson extension ??? */ +{ MKTAG('a','l','a','c'), mov_read_extradata }, /* alac specific atom */ +{ MKTAG('a','v','c','C'), mov_read_glbl }, +{ MKTAG('p','a','s','p'), mov_read_pasp }, +{ MKTAG('s','t','b','l'), mov_read_default }, +{ MKTAG('s','t','c','o'), mov_read_stco }, +{ MKTAG('s','t','s','c'), mov_read_stsc }, +{ MKTAG('s','t','s','d'), mov_read_stsd }, /* sample description */ +{ MKTAG('s','t','s','s'), mov_read_stss }, /* sync sample */ +{ MKTAG('s','t','s','z'), mov_read_stsz }, /* sample size */ +{ MKTAG('s','t','t','s'), mov_read_stts }, +{ MKTAG('t','k','h','d'), mov_read_tkhd }, /* track header */ +{ MKTAG('t','f','h','d'), mov_read_tfhd }, /* track fragment header */ +{ MKTAG('t','r','a','k'), mov_read_trak }, +{ MKTAG('t','r','a','f'), mov_read_default }, +{ MKTAG('t','r','e','x'), mov_read_trex }, +{ MKTAG('t','r','k','n'), mov_read_trkn }, +{ MKTAG('t','r','u','n'), mov_read_trun }, +{ MKTAG('u','d','t','a'), mov_read_default }, +{ MKTAG('w','a','v','e'), mov_read_wave }, +{ MKTAG('e','s','d','s'), mov_read_esds }, +{ MKTAG('w','i','d','e'), mov_read_wide }, /* place holder */ +{ MKTAG('c','m','o','v'), mov_read_cmov }, +{ MKTAG(0xa9,'n','a','m'), mov_read_udta_string }, +{ MKTAG(0xa9,'w','r','t'), mov_read_udta_string }, +{ MKTAG(0xa9,'c','p','y'), mov_read_udta_string }, +{ MKTAG(0xa9,'i','n','f'), mov_read_udta_string }, +{ MKTAG(0xa9,'i','n','f'), mov_read_udta_string }, +{ MKTAG(0xa9,'A','R','T'), mov_read_udta_string }, +{ MKTAG(0xa9,'a','l','b'), mov_read_udta_string }, +{ MKTAG(0xa9,'c','m','t'), mov_read_udta_string }, { 0, NULL } }; @@ -1662,23 +1855,24 @@ static int mov_probe(AVProbeData *p) tag = AV_RL32(p->buf + offset + 4); switch(tag) { /* check for obvious tags */ - case MKTAG( 'j', 'P', ' ', ' ' ): /* jpeg 2000 signature */ - case MKTAG( 'm', 'o', 'o', 'v' ): - case MKTAG( 'm', 'd', 'a', 't' ): - case MKTAG( 'p', 'n', 'o', 't' ): /* detect movs with preview pics like ew.mov and april.mov */ - case MKTAG( 'u', 'd', 't', 'a' ): /* Packet Video PVAuthor adds this and a lot of more junk */ + case MKTAG('j','P',' ',' '): /* jpeg 2000 signature */ + case MKTAG('m','o','o','v'): + case MKTAG('m','d','a','t'): + case MKTAG('p','n','o','t'): /* detect movs with preview pics like ew.mov and april.mov */ + case MKTAG('u','d','t','a'): /* Packet Video PVAuthor adds this and a lot of more junk */ + case MKTAG('f','t','y','p'): return AVPROBE_SCORE_MAX; /* those are more common words, so rate then a bit less */ - case MKTAG( 'e', 'd', 'i', 'w' ): /* xdcam files have reverted first tags */ - case MKTAG( 'w', 'i', 'd', 'e' ): - case MKTAG( 'f', 'r', 'e', 'e' ): - case MKTAG( 'j', 'u', 'n', 'k' ): - case MKTAG( 'p', 'i', 'c', 't' ): + case MKTAG('e','d','i','w'): /* xdcam files have reverted first tags */ + case MKTAG('w','i','d','e'): + case MKTAG('f','r','e','e'): + case MKTAG('j','u','n','k'): + case MKTAG('p','i','c','t'): return AVPROBE_SCORE_MAX - 5; - case MKTAG(0x82,0x82,0x7f,0x7d ): - case MKTAG( 'f', 't', 'y', 'p' ): - case MKTAG( 's', 'k', 'i', 'p' ): - case MKTAG( 'u', 'u', 'i', 'd' ): + case MKTAG(0x82,0x82,0x7f,0x7d): + case MKTAG('s','k','i','p'): + case MKTAG('u','u','i','d'): + case MKTAG('p','r','f','l'): offset = AV_RB32(p->buf+offset) + offset; /* if we only find those cause probedata is too small at least rate them */ score = AVPROBE_SCORE_MAX - 50; @@ -1696,7 +1890,7 @@ static int mov_read_header(AVFormatContext *s, AVFormatParameters *ap) MOVContext *mov = s->priv_data; ByteIOContext *pb = s->pb; int err; - MOV_atom_t atom = { 0, 0, 0 }; + MOVAtom atom = { 0, 0, 0 }; mov->fc = s; /* .mov and .mp4 aren't streamable anyway (only progressive download if moov is before mdat) */ @@ -1706,13 +1900,15 @@ static int mov_read_header(AVFormatContext *s, AVFormatParameters *ap) atom.size = INT64_MAX; /* check MOV header */ - err = mov_read_default(mov, pb, atom); - if (err<0 || (!mov->found_moov && !mov->found_mdat)) { - av_log(s, AV_LOG_ERROR, "mov: header not found !!! (err:%d, moov:%d, mdat:%d) pos:%"PRId64"\n", - err, mov->found_moov, mov->found_mdat, url_ftell(pb)); + if ((err = mov_read_default(mov, pb, atom)) < 0) { + av_log(s, AV_LOG_ERROR, "error reading header: %d\n", err); + return err; + } + if (!mov->found_moov) { + av_log(s, AV_LOG_ERROR, "moov atom not found\n"); return -1; } - dprintf(mov->fc, "on_parse_exit_offset=%d\n", (int) url_ftell(pb)); + dprintf(mov->fc, "on_parse_exit_offset=%lld\n", url_ftell(pb)); return 0; } @@ -1747,7 +1943,7 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt) if (!sample) { mov->found_mdat = 0; if (!url_is_streamed(s->pb) || - mov_read_default(mov, s->pb, (MOV_atom_t){ 0, 0, INT64_MAX }) < 0 || + mov_read_default(mov, s->pb, (MOVAtom){ 0, 0, INT64_MAX }) < 0 || url_feof(s->pb)) return -1; dprintf(s, "read fragments, offset 0x%llx\n", url_ftell(s->pb)); @@ -1761,7 +1957,7 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt) return -1; } av_get_packet(sc->pb, pkt, sample->size); -#ifdef CONFIG_DV_DEMUXER +#if CONFIG_DV_DEMUXER if (mov->dv_demux && sc->dv_audio_container) { dv_produce_packet(mov->dv_demux, pkt, pkt->data, pkt->size); av_free(pkt->data); @@ -1773,16 +1969,22 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt) pkt->stream_index = sc->ffindex; pkt->dts = sample->timestamp; if (sc->ctts_data) { - assert(sc->ctts_data[sc->sample_to_ctime_index].duration % sc->time_rate == 0); - pkt->pts = pkt->dts + sc->ctts_data[sc->sample_to_ctime_index].duration / sc->time_rate; + assert(sc->ctts_data[sc->ctts_index].duration % sc->time_rate == 0); + pkt->pts = pkt->dts + sc->ctts_data[sc->ctts_index].duration / sc->time_rate; /* update ctts context */ - sc->sample_to_ctime_sample++; - if (sc->sample_to_ctime_index < sc->ctts_count && - sc->ctts_data[sc->sample_to_ctime_index].count == sc->sample_to_ctime_sample) { - sc->sample_to_ctime_index++; - sc->sample_to_ctime_sample = 0; + sc->ctts_sample++; + if (sc->ctts_index < sc->ctts_count && + sc->ctts_data[sc->ctts_index].count == sc->ctts_sample) { + sc->ctts_index++; + sc->ctts_sample = 0; } + if (sc->wrong_dts) + pkt->dts = AV_NOPTS_VALUE; } else { + AVStream *st = s->streams[sc->ffindex]; + int64_t next_dts = (sc->current_sample < sc->sample_count) ? + st->index_entries[sc->current_sample].timestamp : st->duration; + pkt->duration = next_dts - pkt->dts; pkt->pts = pkt->dts; } pkt->flags |= sample->flags & AVINDEX_KEYFRAME ? PKT_FLAG_KEY : 0; @@ -1810,8 +2012,8 @@ static int mov_seek_stream(AVStream *st, int64_t timestamp, int flags) for (i = 0; i < sc->ctts_count; i++) { int next = time_sample + sc->ctts_data[i].count; if (next > sc->current_sample) { - sc->sample_to_ctime_index = i; - sc->sample_to_ctime_sample = sc->current_sample - time_sample; + sc->ctts_index = i; + sc->ctts_sample = sc->current_sample - time_sample; break; } time_sample = next; @@ -1829,6 +2031,8 @@ static int mov_read_seek(AVFormatContext *s, int stream_index, int64_t sample_ti if (stream_index >= s->nb_streams) return -1; + if (sample_time < 0) + sample_time = 0; st = s->streams[stream_index]; sample = mov_seek_stream(st, sample_time, flags); @@ -1876,7 +2080,7 @@ static int mov_read_close(AVFormatContext *s) AVInputFormat mov_demuxer = { "mov,mp4,m4a,3gp,3g2,mj2", - "QuickTime/MPEG4/Motion JPEG 2000 format", + NULL_IF_CONFIG_SMALL("QuickTime/MPEG-4/Motion JPEG 2000 format"), sizeof(MOVContext), mov_probe, mov_read_header,