X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavformat%2Fmatroskadec.c;h=954ef3b863568bf2d843b3c66dbbfd3fba1ed740;hb=9d3fdf2031403301f25d7f5b4f5c323ba95139ee;hp=442a5a013afacd13d22f523c201986ed4b372950;hpb=21a115d17bafa1c07f3959af3f6726eb1cb27bc8;p=ffmpeg diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c index 442a5a013af..954ef3b8635 100644 --- a/libavformat/matroskadec.c +++ b/libavformat/matroskadec.c @@ -20,7 +20,7 @@ */ /** - * @file matroskadec.c + * @file * Matroska file demuxer * by Ronald Bultje * with a little help from Moritz Bunkus @@ -30,18 +30,21 @@ #include #include "avformat.h" -/* For codec_get_id(). */ +#include "internal.h" +/* For ff_codec_get_id(). */ #include "riff.h" #include "isom.h" +#include "rm.h" #include "matroska.h" #include "libavcodec/mpeg4audio.h" #include "libavutil/intfloat_readwrite.h" +#include "libavutil/intreadwrite.h" #include "libavutil/avstring.h" #include "libavutil/lzo.h" -#ifdef CONFIG_ZLIB +#if CONFIG_ZLIB #include #endif -#ifdef CONFIG_BZLIB +#if CONFIG_BZLIB #include #endif @@ -127,25 +130,32 @@ typedef struct { typedef struct { uint64_t num; + uint64_t uid; uint64_t type; + char *name; char *codec_id; EbmlBin codec_priv; char *language; double time_scale; uint64_t default_duration; uint64_t flag_default; + uint64_t flag_forced; MatroskaTrackVideo video; MatroskaTrackAudio audio; EbmlList encodings; AVStream *stream; int64_t end_timecode; + int ms_compat; } MatroskaTrack; typedef struct { + uint64_t uid; char *filename; char *mime; EbmlBin bin; + + AVStream *stream; } MatroskaAttachement; typedef struct { @@ -153,6 +163,8 @@ typedef struct { uint64_t end; uint64_t uid; char *title; + + AVChapter *chapter; } MatroskaChapter; typedef struct { @@ -168,9 +180,24 @@ typedef struct { typedef struct { char *name; char *string; + char *lang; + uint64_t def; EbmlList sub; } MatroskaTag; +typedef struct { + char *type; + uint64_t typevalue; + uint64_t trackuid; + uint64_t chapteruid; + uint64_t attachuid; +} MatroskaTagTarget; + +typedef struct { + MatroskaTagTarget target; + EbmlList tag; +} MatroskaTags; + typedef struct { uint64_t id; uint64_t pos; @@ -188,6 +215,7 @@ typedef struct { int num_levels; MatroskaLevel levels[EBML_MAX_DEPTH]; int level_up; + uint32_t current_id; uint64_t time_scale; double duration; @@ -200,7 +228,7 @@ typedef struct { EbmlList seekhead; /* byte position of the segment inside the stream */ - offset_t segment_start; + int64_t segment_start; /* the packet queue */ AVPacket **packets; @@ -208,7 +236,6 @@ typedef struct { AVPacket *prev_pkt; int done; - int has_cluster_id; /* What to skip before effectively reading a packet. */ int skip_to_keyframe; @@ -218,6 +245,7 @@ typedef struct { typedef struct { uint64_t duration; int64_t reference; + uint64_t non_simple; EbmlBin bin; } MatroskaBlock; @@ -226,8 +254,6 @@ typedef struct { EbmlList blocks; } MatroskaCluster; -#define ARRAY_SIZE(x) (sizeof(x)/sizeof(*x)) - static EbmlSyntax ebml_header[] = { { EBML_ID_EBMLREADVERSION, EBML_UINT, 0, offsetof(Ebml,version), {.u=EBML_VERSION} }, { EBML_ID_EBMLMAXSIZELENGTH, EBML_UINT, 0, offsetof(Ebml,max_size), {.u=8} }, @@ -302,6 +328,8 @@ static EbmlSyntax matroska_track_encodings[] = { static EbmlSyntax matroska_track[] = { { MATROSKA_ID_TRACKNUMBER, EBML_UINT, 0, offsetof(MatroskaTrack,num) }, + { MATROSKA_ID_TRACKNAME, EBML_UTF8, 0, offsetof(MatroskaTrack,name) }, + { MATROSKA_ID_TRACKUID, EBML_UINT, 0, offsetof(MatroskaTrack,uid) }, { MATROSKA_ID_TRACKTYPE, EBML_UINT, 0, offsetof(MatroskaTrack,type) }, { MATROSKA_ID_CODECID, EBML_STR, 0, offsetof(MatroskaTrack,codec_id) }, { MATROSKA_ID_CODECPRIVATE, EBML_BIN, 0, offsetof(MatroskaTrack,codec_priv) }, @@ -309,13 +337,11 @@ static EbmlSyntax matroska_track[] = { { MATROSKA_ID_TRACKDEFAULTDURATION, EBML_UINT, 0, offsetof(MatroskaTrack,default_duration) }, { MATROSKA_ID_TRACKTIMECODESCALE, EBML_FLOAT,0, offsetof(MatroskaTrack,time_scale), {.f=1.0} }, { MATROSKA_ID_TRACKFLAGDEFAULT, EBML_UINT, 0, offsetof(MatroskaTrack,flag_default), {.u=1} }, + { MATROSKA_ID_TRACKFLAGFORCED, EBML_UINT, 0, offsetof(MatroskaTrack,flag_forced), {.u=0} }, { MATROSKA_ID_TRACKVIDEO, EBML_NEST, 0, offsetof(MatroskaTrack,video), {.n=matroska_track_video} }, { MATROSKA_ID_TRACKAUDIO, EBML_NEST, 0, offsetof(MatroskaTrack,audio), {.n=matroska_track_audio} }, { MATROSKA_ID_TRACKCONTENTENCODINGS,EBML_NEST, 0, 0, {.n=matroska_track_encodings} }, - { MATROSKA_ID_TRACKUID, EBML_NONE }, - { MATROSKA_ID_TRACKNAME, EBML_NONE }, { MATROSKA_ID_TRACKFLAGENABLED, EBML_NONE }, - { MATROSKA_ID_TRACKFLAGFORCED, EBML_NONE }, { MATROSKA_ID_TRACKFLAGLACING, EBML_NONE }, { MATROSKA_ID_CODECNAME, EBML_NONE }, { MATROSKA_ID_CODECDECODEALL, EBML_NONE }, @@ -333,11 +359,11 @@ static EbmlSyntax matroska_tracks[] = { }; static EbmlSyntax matroska_attachment[] = { + { MATROSKA_ID_FILEUID, EBML_UINT, 0, offsetof(MatroskaAttachement,uid) }, { MATROSKA_ID_FILENAME, EBML_UTF8, 0, offsetof(MatroskaAttachement,filename) }, { MATROSKA_ID_FILEMIMETYPE, EBML_STR, 0, offsetof(MatroskaAttachement,mime) }, { MATROSKA_ID_FILEDATA, EBML_BIN, 0, offsetof(MatroskaAttachement,bin) }, { MATROSKA_ID_FILEDESC, EBML_NONE }, - { MATROSKA_ID_FILEUID, EBML_NONE }, { 0 } }; @@ -399,20 +425,30 @@ static EbmlSyntax matroska_index[] = { static EbmlSyntax matroska_simpletag[] = { { MATROSKA_ID_TAGNAME, EBML_UTF8, 0, offsetof(MatroskaTag,name) }, { MATROSKA_ID_TAGSTRING, EBML_UTF8, 0, offsetof(MatroskaTag,string) }, + { MATROSKA_ID_TAGLANG, EBML_STR, 0, offsetof(MatroskaTag,lang), {.s="und"} }, + { MATROSKA_ID_TAGDEFAULT, EBML_UINT, 0, offsetof(MatroskaTag,def) }, + { MATROSKA_ID_TAGDEFAULT_BUG, EBML_UINT, 0, offsetof(MatroskaTag,def) }, { MATROSKA_ID_SIMPLETAG, EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTag,sub), {.n=matroska_simpletag} }, - { MATROSKA_ID_TAGLANG, EBML_NONE }, - { MATROSKA_ID_TAGDEFAULT, EBML_NONE }, + { 0 } +}; + +static EbmlSyntax matroska_tagtargets[] = { + { MATROSKA_ID_TAGTARGETS_TYPE, EBML_STR, 0, offsetof(MatroskaTagTarget,type) }, + { MATROSKA_ID_TAGTARGETS_TYPEVALUE, EBML_UINT, 0, offsetof(MatroskaTagTarget,typevalue), {.u=50} }, + { MATROSKA_ID_TAGTARGETS_TRACKUID, EBML_UINT, 0, offsetof(MatroskaTagTarget,trackuid) }, + { MATROSKA_ID_TAGTARGETS_CHAPTERUID,EBML_UINT, 0, offsetof(MatroskaTagTarget,chapteruid) }, + { MATROSKA_ID_TAGTARGETS_ATTACHUID, EBML_UINT, 0, offsetof(MatroskaTagTarget,attachuid) }, { 0 } }; static EbmlSyntax matroska_tag[] = { - { MATROSKA_ID_SIMPLETAG, EBML_NEST, sizeof(MatroskaTag), 0, {.n=matroska_simpletag} }, - { MATROSKA_ID_TAGTARGETS, EBML_NONE }, + { MATROSKA_ID_SIMPLETAG, EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTags,tag), {.n=matroska_simpletag} }, + { MATROSKA_ID_TAGTARGETS, EBML_NEST, 0, offsetof(MatroskaTags,target), {.n=matroska_tagtargets} }, { 0 } }; static EbmlSyntax matroska_tags[] = { - { MATROSKA_ID_TAG, EBML_NEST, 0, offsetof(MatroskaDemuxContext,tags), {.n=matroska_tag} }, + { MATROSKA_ID_TAG, EBML_NEST, sizeof(MatroskaTags), offsetof(MatroskaDemuxContext,tags), {.n=matroska_tag} }, { 0 } }; @@ -435,7 +471,7 @@ static EbmlSyntax matroska_segment[] = { { MATROSKA_ID_CUES, EBML_NEST, 0, 0, {.n=matroska_index } }, { MATROSKA_ID_TAGS, EBML_NEST, 0, 0, {.n=matroska_tags } }, { MATROSKA_ID_SEEKHEAD, EBML_NEST, 0, 0, {.n=matroska_seekhead } }, - { MATROSKA_ID_CLUSTER, EBML_STOP, 0, offsetof(MatroskaDemuxContext,has_cluster_id) }, + { MATROSKA_ID_CLUSTER, EBML_STOP }, { 0 } }; @@ -449,6 +485,7 @@ static EbmlSyntax matroska_blockgroup[] = { { MATROSKA_ID_SIMPLEBLOCK, EBML_BIN, 0, offsetof(MatroskaBlock,bin) }, { MATROSKA_ID_BLOCKDURATION, EBML_UINT, 0, offsetof(MatroskaBlock,duration), {.u=AV_NOPTS_VALUE} }, { MATROSKA_ID_BLOCKREFERENCE, EBML_UINT, 0, offsetof(MatroskaBlock,reference) }, + { 1, EBML_UINT, 0, offsetof(MatroskaBlock,non_simple), {.u=1} }, { 0 } }; @@ -470,24 +507,7 @@ static EbmlSyntax matroska_clusters[] = { { 0 } }; -#define SIZE_OFF(x) sizeof(((AVFormatContext*)0)->x),offsetof(AVFormatContext,x) -const struct { - const char name[16]; - int size; - int offset; -} metadata[] = { - { "TITLE", SIZE_OFF(title) }, - { "ARTIST", SIZE_OFF(author) }, - { "WRITTEN_BY", SIZE_OFF(author) }, - { "LEAD_PERFORMER", SIZE_OFF(author) }, - { "COPYRIGHT", SIZE_OFF(copyright) }, - { "COMMENT", SIZE_OFF(comment) }, - { "ALBUM", SIZE_OFF(album) }, - { "DATE_WRITTEN", SIZE_OFF(year) }, - { "DATE_RELEASED", SIZE_OFF(year) }, - { "PART_NUMBER", SIZE_OFF(track) }, - { "GENRE", SIZE_OFF(genre) }, -}; +static const char *matroska_doctypes[] = { "matroska", "webm" }; /* * Return: Whether we reached the end of a level in the hierarchy or not. @@ -495,11 +515,11 @@ const struct { static int ebml_level_end(MatroskaDemuxContext *matroska) { ByteIOContext *pb = matroska->ctx->pb; - offset_t pos = url_ftell(pb); + int64_t pos = url_ftell(pb); if (matroska->num_levels > 0) { MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1]; - if (pos - level->start >= level->length) { + if (pos - level->start >= level->length || matroska->current_id) { matroska->num_levels--; return 1; } @@ -527,7 +547,7 @@ static int ebml_read_num(MatroskaDemuxContext *matroska, ByteIOContext *pb, if (!(total = get_byte(pb))) { /* we might encounter EOS here */ if (!url_feof(pb)) { - offset_t pos = url_ftell(pb); + int64_t pos = url_ftell(pb); av_log(matroska->ctx, AV_LOG_ERROR, "Read error at pos. %"PRIu64" (0x%"PRIx64")\n", pos, pos); @@ -541,7 +561,7 @@ static int ebml_read_num(MatroskaDemuxContext *matroska, ByteIOContext *pb, len_mask >>= 1; } if (read > max_size) { - offset_t pos = url_ftell(pb) - 1; + int64_t pos = url_ftell(pb) - 1; av_log(matroska->ctx, AV_LOG_ERROR, "Invalid EBML number size tag 0x%02x at pos %"PRIu64" (0x%"PRIx64")\n", (uint8_t) total, pos, pos); @@ -605,7 +625,7 @@ static int ebml_read_ascii(ByteIOContext *pb, int size, char **str) if (!(*str = av_malloc(size + 1))) return AVERROR(ENOMEM); if (get_buffer(pb, (uint8_t *) *str, size) != size) { - av_free(*str); + av_freep(str); return AVERROR(EIO); } (*str)[size] = '\0'; @@ -625,8 +645,10 @@ static int ebml_read_binary(ByteIOContext *pb, int length, EbmlBin *bin) bin->size = length; bin->pos = url_ftell(pb); - if (get_buffer(pb, bin->data, length) != length) + if (get_buffer(pb, bin->data, length) != length) { + av_freep(&bin->data); return AVERROR(EIO); + } return 0; } @@ -636,7 +658,7 @@ static int ebml_read_binary(ByteIOContext *pb, int length, EbmlBin *bin) * are supposed to be sub-elements which can be read separately. * 0 is success, < 0 is failure. */ -static int ebml_read_master(MatroskaDemuxContext *matroska, int length) +static int ebml_read_master(MatroskaDemuxContext *matroska, uint64_t length) { ByteIOContext *pb = matroska->ctx->pb; MatroskaLevel *level; @@ -663,7 +685,7 @@ static int matroska_ebmlnum_uint(MatroskaDemuxContext *matroska, { ByteIOContext pb; init_put_byte(&pb, data, size, 0, NULL, NULL, NULL, NULL); - return ebml_read_num(matroska, &pb, 8, num); + return ebml_read_num(matroska, &pb, FFMIN(size, 8), num); } /* @@ -695,6 +717,10 @@ static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax, for (i=0; syntax[i].id; i++) if (id == syntax[i].id) break; + if (!syntax[i].id && id == MATROSKA_ID_CLUSTER && + matroska->num_levels > 0 && + matroska->levels[matroska->num_levels-1].length == 0xffffffffffffff) + return 0; // we reached the end of an unknown size cluster if (!syntax[i].id && id != EBML_ID_VOID && id != EBML_ID_CRC32) av_log(matroska->ctx, AV_LOG_INFO, "Unknown entry 0x%X\n", id); return ebml_parse_elem(matroska, &syntax[i], data); @@ -703,10 +729,14 @@ static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax, static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax, void *data) { - uint64_t id; - int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id); - id |= 1 << 7*res; - return res < 0 ? res : ebml_parse_id(matroska, syntax, id, data); + if (!matroska->current_id) { + uint64_t id; + int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id); + if (res < 0) + return res; + matroska->current_id = id | 1 << 7*res; + } + return ebml_parse_id(matroska, syntax, matroska->current_id, data); } static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax, @@ -751,9 +781,11 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska, list->nb_elem++; } - if (syntax->type != EBML_PASS && syntax->type != EBML_STOP) + if (syntax->type != EBML_PASS && syntax->type != EBML_STOP) { + matroska->current_id = 0; if ((res = ebml_read_num(matroska, pb, 8, &length)) < 0) return res; + } switch (syntax->type) { case EBML_UINT: res = ebml_read_uint (pb, length, data); break; @@ -767,7 +799,7 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska, matroska->segment_start = url_ftell(matroska->ctx->pb); return ebml_parse_nest(matroska, syntax->def.n, data); case EBML_PASS: return ebml_parse_id(matroska, syntax->def.n, id, data); - case EBML_STOP: *(int *)data = 1; return 1; + case EBML_STOP: return 1; default: return url_fseek(pb,length,SEEK_CUR)<0 ? AVERROR(EIO) : 0; } if (res == AVERROR_INVALIDDATA) @@ -807,8 +839,7 @@ static void ebml_free(EbmlSyntax *syntax, void *data) static int matroska_probe(AVProbeData *p) { uint64_t total = 0; - int len_mask = 0x80, size = 1, n = 1; - static const char probe_data[] = "matroska"; + int len_mask = 0x80, size = 1, n = 1, i; /* EBML header? */ if (AV_RB32(p->buf) != EBML_ID_HEADER) @@ -830,15 +861,19 @@ static int matroska_probe(AVProbeData *p) if (p->buf_size < 4 + size + total) return 0; - /* The header must contain the document type 'matroska'. For now, + /* The header should contain a known document type. For now, * we don't parse the whole header but simply check for the * availability of that array of characters inside the header. * Not fully fool-proof, but good enough. */ - for (n = 4+size; n <= 4+size+total-(sizeof(probe_data)-1); n++) - if (!memcmp(p->buf+n, probe_data, sizeof(probe_data)-1)) - return AVPROBE_SCORE_MAX; + for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++) { + int probelen = strlen(matroska_doctypes[i]); + for (n = 4+size; n <= 4+size+total-probelen; n++) + if (!memcmp(p->buf+n, matroska_doctypes[i], probelen)) + return AVPROBE_SCORE_MAX; + } - return 0; + // probably valid EBML header but no recognized doctype + return AVPROBE_SCORE_MAX/2; } static MatroskaTrack *matroska_find_track_by_num(MatroskaDemuxContext *matroska, @@ -872,15 +907,14 @@ static int matroska_decode_buffer(uint8_t** buf, int* buf_size, case MATROSKA_TRACK_ENCODING_COMP_LZO: do { olen = pkt_size *= 3; - pkt_data = av_realloc(pkt_data, - pkt_size+LZO_OUTPUT_PADDING); - result = lzo1x_decode(pkt_data, &olen, data, &isize); - } while (result==LZO_OUTPUT_FULL && pkt_size<10000000); + pkt_data = av_realloc(pkt_data, pkt_size+AV_LZO_OUTPUT_PADDING); + result = av_lzo1x_decode(pkt_data, &olen, data, &isize); + } while (result==AV_LZO_OUTPUT_FULL && pkt_size<10000000); if (result) goto failed; pkt_size -= olen; break; -#ifdef CONFIG_ZLIB +#if CONFIG_ZLIB case MATROSKA_TRACK_ENCODING_COMP_ZLIB: { z_stream zstream = {0}; if (inflateInit(&zstream) != Z_OK) @@ -901,7 +935,7 @@ static int matroska_decode_buffer(uint8_t** buf, int* buf_size, break; } #endif -#ifdef CONFIG_BZLIB +#if CONFIG_BZLIB case MATROSKA_TRACK_ENCODING_COMP_BZLIB: { bz_stream bzstream = {0}; if (BZ2_bzDecompressInit(&bzstream, 0, 0) != BZ_OK) @@ -974,24 +1008,61 @@ static void matroska_merge_packets(AVPacket *out, AVPacket *in) av_free(in); } -static void matroska_convert_tags(AVFormatContext *s, EbmlList *list) +static void matroska_convert_tag(AVFormatContext *s, EbmlList *list, + AVMetadata **metadata, char *prefix) { MatroskaTag *tags = list->elem; - int i, j; + char key[1024]; + int i; for (i=0; i < list->nb_elem; i++) { - for (j=0; j < ARRAY_SIZE(metadata); j++){ - if (!strcmp(tags[i].name, metadata[j].name)) { - int *ptr = (int *)((char *)s + metadata[j].offset); - if (*ptr) continue; - if (metadata[j].size > sizeof(int)) - av_strlcpy((char *)ptr, tags[i].string, metadata[j].size); - else - *ptr = atoi(tags[i].string); - } - } + const char *lang = strcmp(tags[i].lang, "und") ? tags[i].lang : NULL; + if (prefix) snprintf(key, sizeof(key), "%s/%s", prefix, tags[i].name); + else av_strlcpy(key, tags[i].name, sizeof(key)); + if (tags[i].def || !lang) { + av_metadata_set2(metadata, key, tags[i].string, 0); if (tags[i].sub.nb_elem) - matroska_convert_tags(s, &tags[i].sub); + matroska_convert_tag(s, &tags[i].sub, metadata, key); + } + if (lang) { + av_strlcat(key, "-", sizeof(key)); + av_strlcat(key, lang, sizeof(key)); + av_metadata_set2(metadata, key, tags[i].string, 0); + if (tags[i].sub.nb_elem) + matroska_convert_tag(s, &tags[i].sub, metadata, key); + } + } +} + +static void matroska_convert_tags(AVFormatContext *s) +{ + MatroskaDemuxContext *matroska = s->priv_data; + MatroskaTags *tags = matroska->tags.elem; + int i, j; + + for (i=0; i < matroska->tags.nb_elem; i++) { + if (tags[i].target.attachuid) { + MatroskaAttachement *attachment = matroska->attachments.elem; + for (j=0; jattachments.nb_elem; j++) + if (attachment[j].uid == tags[i].target.attachuid) + matroska_convert_tag(s, &tags[i].tag, + &attachment[j].stream->metadata, NULL); + } else if (tags[i].target.chapteruid) { + MatroskaChapter *chapter = matroska->chapters.elem; + for (j=0; jchapters.nb_elem; j++) + if (chapter[j].uid == tags[i].target.chapteruid) + matroska_convert_tag(s, &tags[i].tag, + &chapter[j].chapter->metadata, NULL); + } else if (tags[i].target.trackuid) { + MatroskaTrack *track = matroska->tracks.elem; + for (j=0; jtracks.nb_elem; j++) + if (track[j].uid == tags[i].target.trackuid) + matroska_convert_tag(s, &tags[i].tag, + &track[j].stream->metadata, NULL); + } else { + matroska_convert_tag(s, &tags[i].tag, &s->metadata, + tags[i].target.type); + } } } @@ -1000,12 +1071,18 @@ static void matroska_execute_seekhead(MatroskaDemuxContext *matroska) EbmlList *seekhead_list = &matroska->seekhead; MatroskaSeekhead *seekhead = seekhead_list->elem; uint32_t level_up = matroska->level_up; - offset_t before_pos = url_ftell(matroska->ctx->pb); + int64_t before_pos = url_ftell(matroska->ctx->pb); + uint32_t saved_id = matroska->current_id; MatroskaLevel level; int i; + // we should not do any seeking in the streaming case + if (url_is_streamed(matroska->ctx->pb) || + (matroska->ctx->flags & AVFMT_FLAG_IGNIDX)) + return; + for (i=0; inb_elem; i++) { - offset_t offset = seekhead[i].pos + matroska->segment_start; + int64_t offset = seekhead[i].pos + matroska->segment_start; if (seekhead[i].pos <= before_pos || seekhead[i].id == MATROSKA_ID_SEEKHEAD @@ -1029,6 +1106,7 @@ static void matroska_execute_seekhead(MatroskaDemuxContext *matroska) level.length = (uint64_t)-1; matroska->levels[matroska->num_levels] = level; matroska->num_levels++; + matroska->current_id = 0; ebml_parse(matroska, matroska_segment, matroska); @@ -1043,6 +1121,7 @@ static void matroska_execute_seekhead(MatroskaDemuxContext *matroska) /* seek back */ url_fseek(matroska->ctx->pb, before_pos, SEEK_SET); matroska->level_up = level_up; + matroska->current_id = saved_id; } static int matroska_aac_profile(char *codec_id) @@ -1050,7 +1129,7 @@ static int matroska_aac_profile(char *codec_id) static const char * const aac_profiles[] = { "MAIN", "LC", "SSR" }; int profile; - for (profile=0; profilectx = s; /* First read the EBML header. */ if (ebml_parse(matroska, ebml_syntax, &ebml) || ebml.version > EBML_VERSION || ebml.max_size > sizeof(uint64_t) - || ebml.id_length > sizeof(uint32_t) || strcmp(ebml.doctype, "matroska") - || ebml.doctype_version > 2) { + || ebml.id_length > sizeof(uint32_t) || ebml.doctype_version > 2) { av_log(matroska->ctx, AV_LOG_ERROR, "EBML header using unsupported features\n" "(EBML version %"PRIu64", doctype %s, doc version %"PRIu64")\n", ebml.version, ebml.doctype, ebml.doctype_version); - return AVERROR_NOFMT; + ebml_free(ebml_syntax, &ebml); + return AVERROR_PATCHWELCOME; + } + for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++) + if (!strcmp(ebml.doctype, matroska_doctypes[i])) + break; + if (i >= FF_ARRAY_ELEMS(matroska_doctypes)) { + av_log(s, AV_LOG_WARNING, "Unknown EBML doctype '%s'\n", ebml.doctype); } ebml_free(ebml_syntax, &ebml); /* The next thing is a segment. */ - if (ebml_parse(matroska, matroska_segments, matroska) < 0) - return -1; + if ((res = ebml_parse(matroska, matroska_segments, matroska)) < 0) + return res; matroska_execute_seekhead(matroska); + if (!matroska->time_scale) + matroska->time_scale = 1000000; if (matroska->duration) matroska->ctx->duration = matroska->duration * matroska->time_scale * 1000 / AV_TIME_BASE; - if (matroska->title) - strncpy(matroska->ctx->title, matroska->title, - sizeof(matroska->ctx->title)-1); - matroska_convert_tags(s, &matroska->tags); + av_metadata_set2(&s->metadata, "title", matroska->title, 0); tracks = matroska->tracks.elem; for (i=0; i < matroska->tracks.nb_elem; i++) { @@ -1117,6 +1203,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) uint8_t *extradata = NULL; int extradata_size = 0; int extradata_offset = 0; + ByteIOContext b; /* Apply some sanity checks. */ if (track->type != MATROSKA_TRACK_TYPE_VIDEO && @@ -1147,10 +1234,10 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) } else if (encodings_list->nb_elem == 1) { if (encodings[0].type || (encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP && -#ifdef CONFIG_ZLIB +#if CONFIG_ZLIB encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_ZLIB && #endif -#ifdef CONFIG_BZLIB +#if CONFIG_BZLIB encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_BZLIB && #endif encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_LZO)) { @@ -1195,18 +1282,23 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) if (!strcmp(track->codec_id, "V_MS/VFW/FOURCC") && track->codec_priv.size >= 40 && track->codec_priv.data != NULL) { + track->ms_compat = 1; track->video.fourcc = AV_RL32(track->codec_priv.data + 16); - codec_id = codec_get_id(codec_bmp_tags, track->video.fourcc); + codec_id = ff_codec_get_id(ff_codec_bmp_tags, track->video.fourcc); + extradata_offset = 40; } else if (!strcmp(track->codec_id, "A_MS/ACM") - && track->codec_priv.size >= 18 + && track->codec_priv.size >= 14 && track->codec_priv.data != NULL) { - uint16_t tag = AV_RL16(track->codec_priv.data); - codec_id = codec_get_id(codec_wav_tags, tag); + init_put_byte(&b, track->codec_priv.data, track->codec_priv.size, + URL_RDONLY, NULL, NULL, NULL, NULL); + ff_get_wav_header(&b, st->codec, track->codec_priv.size); + codec_id = st->codec->codec_id; + extradata_offset = FFMIN(track->codec_priv.size, 18); } else if (!strcmp(track->codec_id, "V_QUICKTIME") && (track->codec_priv.size >= 86) && (track->codec_priv.data != NULL)) { track->video.fourcc = AV_RL32(track->codec_priv.data); - codec_id=codec_get_id(codec_movvideo_tags, track->video.fourcc); + codec_id=ff_codec_get_id(codec_movvideo_tags, track->video.fourcc); } else if (codec_id == CODEC_ID_PCM_S16BE) { switch (track->audio.bitdepth) { case 8: codec_id = CODEC_ID_PCM_U8; break; @@ -1238,7 +1330,6 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) } else extradata_size = 2; } else if (codec_id == CODEC_ID_TTA) { - ByteIOContext b; extradata_size = 30; extradata = av_mallocz(extradata_size); if (extradata == NULL) @@ -1254,17 +1345,16 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) } else if (codec_id == CODEC_ID_RV10 || codec_id == CODEC_ID_RV20 || codec_id == CODEC_ID_RV30 || codec_id == CODEC_ID_RV40) { extradata_offset = 26; - track->codec_priv.size -= extradata_offset; } else if (codec_id == CODEC_ID_RA_144) { track->audio.out_samplerate = 8000; track->audio.channels = 1; } else if (codec_id == CODEC_ID_RA_288 || codec_id == CODEC_ID_COOK || - codec_id == CODEC_ID_ATRAC3) { - ByteIOContext b; - + codec_id == CODEC_ID_ATRAC3 || codec_id == CODEC_ID_SIPR) { + int flavor; init_put_byte(&b, track->codec_priv.data,track->codec_priv.size, 0, NULL, NULL, NULL, NULL); - url_fskip(&b, 24); + url_fskip(&b, 22); + flavor = get_be16(&b); track->audio.coded_framesize = get_be32(&b); url_fskip(&b, 12); track->audio.sub_packet_h = get_be16(&b); @@ -1275,11 +1365,16 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) st->codec->block_align = track->audio.coded_framesize; track->codec_priv.size = 0; } else { + if (codec_id == CODEC_ID_SIPR && flavor < 4) { + const int sipr_bit_rate[4] = { 6504, 8496, 5000, 16000 }; + track->audio.sub_packet_size = ff_sipr_subpk_size[flavor]; + st->codec->bit_rate = sipr_bit_rate[flavor]; + } st->codec->block_align = track->audio.sub_packet_size; extradata_offset = 78; - track->codec_priv.size -= extradata_offset; } } + track->codec_priv.size -= extradata_offset; if (codec_id == CODEC_ID_NONE) av_log(matroska->ctx, AV_LOG_INFO, @@ -1292,30 +1387,36 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) st->codec->codec_id = codec_id; st->start_time = 0; if (strcmp(track->language, "und")) - av_strlcpy(st->language, track->language, 4); + av_metadata_set2(&st->metadata, "language", track->language, 0); + av_metadata_set2(&st->metadata, "title", track->name, 0); if (track->flag_default) st->disposition |= AV_DISPOSITION_DEFAULT; + if (track->flag_forced) + st->disposition |= AV_DISPOSITION_FORCED; if (track->default_duration) av_reduce(&st->codec->time_base.num, &st->codec->time_base.den, track->default_duration, 1000000000, 30000); - if(extradata){ - st->codec->extradata = extradata; - st->codec->extradata_size = extradata_size; - } else if(track->codec_priv.data && track->codec_priv.size > 0){ - st->codec->extradata = av_malloc(track->codec_priv.size); - if(st->codec->extradata == NULL) - return AVERROR(ENOMEM); - st->codec->extradata_size = track->codec_priv.size; - memcpy(st->codec->extradata, - track->codec_priv.data + extradata_offset, - track->codec_priv.size); + if (!st->codec->extradata) { + if(extradata){ + st->codec->extradata = extradata; + st->codec->extradata_size = extradata_size; + } else if(track->codec_priv.data && track->codec_priv.size > 0){ + st->codec->extradata = av_mallocz(track->codec_priv.size + + FF_INPUT_BUFFER_PADDING_SIZE); + if(st->codec->extradata == NULL) + return AVERROR(ENOMEM); + st->codec->extradata_size = track->codec_priv.size; + memcpy(st->codec->extradata, + track->codec_priv.data + extradata_offset, + track->codec_priv.size); + } } if (track->type == MATROSKA_TRACK_TYPE_VIDEO) { - st->codec->codec_type = CODEC_TYPE_VIDEO; + st->codec->codec_type = AVMEDIA_TYPE_VIDEO; st->codec->codec_tag = track->video.fourcc; st->codec->width = track->video.pixel_width; st->codec->height = track->video.pixel_height; @@ -1324,13 +1425,18 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) st->codec->height * track->video.display_width, st->codec-> width * track->video.display_height, 255); + if (st->codec->codec_id != CODEC_ID_H264) st->need_parsing = AVSTREAM_PARSE_HEADERS; + if (track->default_duration) + st->avg_frame_rate = av_d2q(1000000000.0/track->default_duration, INT_MAX); } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) { - st->codec->codec_type = CODEC_TYPE_AUDIO; + st->codec->codec_type = AVMEDIA_TYPE_AUDIO; st->codec->sample_rate = track->audio.out_samplerate; st->codec->channels = track->audio.channels; + if (st->codec->codec_id != CODEC_ID_AAC) + st->need_parsing = AVSTREAM_PARSE_HEADERS; } else if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE) { - st->codec->codec_type = CODEC_TYPE_SUBTITLE; + st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE; } } @@ -1343,9 +1449,9 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) AVStream *st = av_new_stream(s, 0); if (st == NULL) break; - st->filename = av_strdup(attachements[j].filename); + av_metadata_set2(&st->metadata, "filename",attachements[j].filename, 0); st->codec->codec_id = CODEC_ID_NONE; - st->codec->codec_type = CODEC_TYPE_ATTACHMENT; + st->codec->codec_type = AVMEDIA_TYPE_ATTACHMENT; st->codec->extradata = av_malloc(attachements[j].bin.size); if(st->codec->extradata == NULL) break; @@ -1359,18 +1465,30 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) break; } } + attachements[j].stream = st; } } chapters = chapters_list->elem; for (i=0; inb_elem; i++) - if (chapters[i].start != AV_NOPTS_VALUE && chapters[i].uid) + if (chapters[i].start != AV_NOPTS_VALUE && chapters[i].uid + && (max_start==0 || chapters[i].start > max_start)) { + chapters[i].chapter = ff_new_chapter(s, chapters[i].uid, (AVRational){1, 1000000000}, chapters[i].start, chapters[i].end, chapters[i].title); + av_metadata_set2(&chapters[i].chapter->metadata, + "title", chapters[i].title, 0); + max_start = chapters[i].start; + } index_list = &matroska->index; index = index_list->elem; + if (index_list->nb_elem + && index[0].time > 100000000000000/matroska->time_scale) { + av_log(matroska->ctx, AV_LOG_WARNING, "Working around broken index.\n"); + index_scale = matroska->time_scale; + } for (i=0; inb_elem; i++) { EbmlList *pos_list = &index[i].pos; MatroskaIndexPos *pos = pos_list->elem; @@ -1380,10 +1498,13 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) if (track && track->stream) av_add_index_entry(track->stream, pos[j].pos + matroska->segment_start, - index[i].time, 0, 0, AVINDEX_KEYFRAME); + index[i].time/index_scale, 0, 0, + AVINDEX_KEYFRAME); } } + matroska_convert_tags(s); + return 0; } @@ -1468,7 +1589,7 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, flags = *data++; size -= 3; if (is_keyframe == -1) - is_keyframe = flags & 0x80 ? PKT_FLAG_KEY : 0; + is_keyframe = flags & 0x80 ? AV_PKT_FLAG_KEY : 0; if (cluster_time != (uint64_t)-1 && (block_time >= 0 || cluster_time >= -block_time)) { @@ -1565,9 +1686,11 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, if (res == 0) { for (n = 0; n < laces; n++) { - if (st->codec->codec_id == CODEC_ID_RA_288 || - st->codec->codec_id == CODEC_ID_COOK || - st->codec->codec_id == CODEC_ID_ATRAC3) { + if ((st->codec->codec_id == CODEC_ID_RA_288 || + st->codec->codec_id == CODEC_ID_COOK || + st->codec->codec_id == CODEC_ID_SIPR || + st->codec->codec_id == CODEC_ID_ATRAC3) && + st->codec->block_align && track->audio.sub_packet_size) { int a = st->codec->block_align; int sps = track->audio.sub_packet_size; int cfs = track->audio.coded_framesize; @@ -1581,11 +1704,15 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, for (x=0; xaudio.buf+x*2*w+y*cfs, data+x*cfs, cfs); + else if (st->codec->codec_id == CODEC_ID_SIPR) + memcpy(track->audio.buf + y*w, data, w); else for (x=0; xaudio.buf+sps*(h*x+((h+1)/2)*(y&1)+(y>>1)), data+x*sps, sps); if (++track->audio.sub_packet_cnt >= h) { + if (st->codec->codec_id == CODEC_ID_SIPR) + ff_rm_reorder_sipr_data(track->audio.buf, h, w); track->audio.sub_packet_cnt = 0; track->audio.pkt_cnt = h*w / a; } @@ -1604,6 +1731,11 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, int offset = 0, pkt_size = lace_size[n]; uint8_t *pkt_data = data; + if (lace_size[n] > size) { + av_log(matroska->ctx, AV_LOG_ERROR, "Invalid packet size\n"); + break; + } + if (encodings && encodings->scope & 1) { offset = matroska_decode_buffer(&pkt_data,&pkt_size, track); if (offset < 0) @@ -1615,7 +1747,6 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, if (av_new_packet(pkt, pkt_size+offset) < 0) { av_free(pkt); res = AVERROR(ENOMEM); - n = laces-1; break; } if (offset) @@ -1629,7 +1760,10 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, pkt->flags = is_keyframe; pkt->stream_index = st->index; - pkt->pts = timecode; + if (track->ms_compat) + pkt->dts = timecode; + else + pkt->pts = timecode; pkt->pos = pos; if (st->codec->codec_id == CODEC_ID_TEXT) pkt->convergence_duration = duration; @@ -1642,10 +1776,11 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, if (matroska->prev_pkt && timecode != AV_NOPTS_VALUE && matroska->prev_pkt->pts == timecode && - matroska->prev_pkt->stream_index == st->index) + matroska->prev_pkt->stream_index == st->index && + st->codec->codec_id == CODEC_ID_SSA) matroska_merge_packets(matroska->prev_pkt, pkt); else { - dynarray_add(&matroska->packets, &matroska->num_packets, pkt); + dynarray_add(&matroska->packets,&matroska->num_packets,pkt); matroska->prev_pkt = pkt; } } @@ -1653,6 +1788,7 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, if (timecode != AV_NOPTS_VALUE) timecode = duration ? timecode + duration : AV_NOPTS_VALUE; data += lace_size[n]; + size -= lace_size[n]; } } @@ -1666,26 +1802,22 @@ static int matroska_parse_cluster(MatroskaDemuxContext *matroska) EbmlList *blocks_list; MatroskaBlock *blocks; int i, res; - offset_t pos = url_ftell(matroska->ctx->pb); + int64_t pos = url_ftell(matroska->ctx->pb); matroska->prev_pkt = NULL; - if (matroska->has_cluster_id){ - /* For the first cluster we parse, its ID was already read as - part of matroska_read_header(), so don't read it again */ - res = ebml_parse_id(matroska, matroska_clusters, - MATROSKA_ID_CLUSTER, &cluster); + if (matroska->current_id) pos -= 4; /* sizeof the ID which was already read */ - matroska->has_cluster_id = 0; - } else - res = ebml_parse(matroska, matroska_clusters, &cluster); + res = ebml_parse(matroska, matroska_clusters, &cluster); blocks_list = &cluster.blocks; blocks = blocks_list->elem; for (i=0; inb_elem; i++) - if (blocks[i].bin.size > 0) + if (blocks[i].bin.size > 0 && blocks[i].bin.data) { + int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1; res=matroska_parse_block(matroska, blocks[i].bin.data, blocks[i].bin.size, blocks[i].bin.pos, cluster.timecode, - blocks[i].duration, !blocks[i].reference, + blocks[i].duration, is_keyframe, pos); + } ebml_free(matroska_cluster, &cluster); if (res < 0) matroska->done = 1; return res; @@ -1697,7 +1829,7 @@ static int matroska_read_packet(AVFormatContext *s, AVPacket *pkt) while (matroska_deliver_packet(matroska, pkt)) { if (matroska->done) - return AVERROR(EIO); + return AVERROR_EOF; matroska_parse_cluster(matroska); } @@ -1775,4 +1907,5 @@ AVInputFormat matroska_demuxer = { matroska_read_packet, matroska_read_close, matroska_read_seek, + .metadata_conv = ff_mkv_metadata_conv, };