X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavformat%2Fmatroskadec.c;h=0d0285e6c0b9a9c4afadc7bcc5fa597521ea5cb1;hb=2a5db1aa588eb157c35e44ca2a76a1184aa5bcf4;hp=7f4f0c4063bab1814bb8011643d47883204921c4;hpb=68a71451f6c748ccb046e3c8a0487ca26f7ed4c3;p=ffmpeg diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c index 7f4f0c4063b..0d0285e6c0b 100644 --- a/libavformat/matroskadec.c +++ b/libavformat/matroskadec.c @@ -20,7 +20,7 @@ */ /** - * @file matroskadec.c + * @file * Matroska file demuxer * by Ronald Bultje * with a little help from Moritz Bunkus @@ -30,9 +30,11 @@ #include #include "avformat.h" -/* For codec_get_id(). */ +#include "internal.h" +/* For ff_codec_get_id(). */ #include "riff.h" #include "isom.h" +#include "rm.h" #include "matroska.h" #include "libavcodec/mpeg4audio.h" #include "libavutil/intfloat_readwrite.h" @@ -128,25 +130,32 @@ typedef struct { typedef struct { uint64_t num; + uint64_t uid; uint64_t type; + char *name; char *codec_id; EbmlBin codec_priv; char *language; double time_scale; uint64_t default_duration; uint64_t flag_default; + uint64_t flag_forced; MatroskaTrackVideo video; MatroskaTrackAudio audio; EbmlList encodings; AVStream *stream; int64_t end_timecode; + int ms_compat; } MatroskaTrack; typedef struct { + uint64_t uid; char *filename; char *mime; EbmlBin bin; + + AVStream *stream; } MatroskaAttachement; typedef struct { @@ -154,6 +163,8 @@ typedef struct { uint64_t end; uint64_t uid; char *title; + + AVChapter *chapter; } MatroskaChapter; typedef struct { @@ -169,9 +180,24 @@ typedef struct { typedef struct { char *name; char *string; + char *lang; + uint64_t def; EbmlList sub; } MatroskaTag; +typedef struct { + char *type; + uint64_t typevalue; + uint64_t trackuid; + uint64_t chapteruid; + uint64_t attachuid; +} MatroskaTagTarget; + +typedef struct { + MatroskaTagTarget target; + EbmlList tag; +} MatroskaTags; + typedef struct { uint64_t id; uint64_t pos; @@ -189,6 +215,7 @@ typedef struct { int num_levels; MatroskaLevel levels[EBML_MAX_DEPTH]; int level_up; + uint32_t current_id; uint64_t time_scale; double duration; @@ -209,7 +236,6 @@ typedef struct { AVPacket *prev_pkt; int done; - int has_cluster_id; /* What to skip before effectively reading a packet. */ int skip_to_keyframe; @@ -219,6 +245,7 @@ typedef struct { typedef struct { uint64_t duration; int64_t reference; + uint64_t non_simple; EbmlBin bin; } MatroskaBlock; @@ -301,6 +328,8 @@ static EbmlSyntax matroska_track_encodings[] = { static EbmlSyntax matroska_track[] = { { MATROSKA_ID_TRACKNUMBER, EBML_UINT, 0, offsetof(MatroskaTrack,num) }, + { MATROSKA_ID_TRACKNAME, EBML_UTF8, 0, offsetof(MatroskaTrack,name) }, + { MATROSKA_ID_TRACKUID, EBML_UINT, 0, offsetof(MatroskaTrack,uid) }, { MATROSKA_ID_TRACKTYPE, EBML_UINT, 0, offsetof(MatroskaTrack,type) }, { MATROSKA_ID_CODECID, EBML_STR, 0, offsetof(MatroskaTrack,codec_id) }, { MATROSKA_ID_CODECPRIVATE, EBML_BIN, 0, offsetof(MatroskaTrack,codec_priv) }, @@ -308,13 +337,11 @@ static EbmlSyntax matroska_track[] = { { MATROSKA_ID_TRACKDEFAULTDURATION, EBML_UINT, 0, offsetof(MatroskaTrack,default_duration) }, { MATROSKA_ID_TRACKTIMECODESCALE, EBML_FLOAT,0, offsetof(MatroskaTrack,time_scale), {.f=1.0} }, { MATROSKA_ID_TRACKFLAGDEFAULT, EBML_UINT, 0, offsetof(MatroskaTrack,flag_default), {.u=1} }, + { MATROSKA_ID_TRACKFLAGFORCED, EBML_UINT, 0, offsetof(MatroskaTrack,flag_forced), {.u=0} }, { MATROSKA_ID_TRACKVIDEO, EBML_NEST, 0, offsetof(MatroskaTrack,video), {.n=matroska_track_video} }, { MATROSKA_ID_TRACKAUDIO, EBML_NEST, 0, offsetof(MatroskaTrack,audio), {.n=matroska_track_audio} }, { MATROSKA_ID_TRACKCONTENTENCODINGS,EBML_NEST, 0, 0, {.n=matroska_track_encodings} }, - { MATROSKA_ID_TRACKUID, EBML_NONE }, - { MATROSKA_ID_TRACKNAME, EBML_NONE }, { MATROSKA_ID_TRACKFLAGENABLED, EBML_NONE }, - { MATROSKA_ID_TRACKFLAGFORCED, EBML_NONE }, { MATROSKA_ID_TRACKFLAGLACING, EBML_NONE }, { MATROSKA_ID_CODECNAME, EBML_NONE }, { MATROSKA_ID_CODECDECODEALL, EBML_NONE }, @@ -332,11 +359,11 @@ static EbmlSyntax matroska_tracks[] = { }; static EbmlSyntax matroska_attachment[] = { + { MATROSKA_ID_FILEUID, EBML_UINT, 0, offsetof(MatroskaAttachement,uid) }, { MATROSKA_ID_FILENAME, EBML_UTF8, 0, offsetof(MatroskaAttachement,filename) }, { MATROSKA_ID_FILEMIMETYPE, EBML_STR, 0, offsetof(MatroskaAttachement,mime) }, { MATROSKA_ID_FILEDATA, EBML_BIN, 0, offsetof(MatroskaAttachement,bin) }, { MATROSKA_ID_FILEDESC, EBML_NONE }, - { MATROSKA_ID_FILEUID, EBML_NONE }, { 0 } }; @@ -398,20 +425,30 @@ static EbmlSyntax matroska_index[] = { static EbmlSyntax matroska_simpletag[] = { { MATROSKA_ID_TAGNAME, EBML_UTF8, 0, offsetof(MatroskaTag,name) }, { MATROSKA_ID_TAGSTRING, EBML_UTF8, 0, offsetof(MatroskaTag,string) }, + { MATROSKA_ID_TAGLANG, EBML_STR, 0, offsetof(MatroskaTag,lang), {.s="und"} }, + { MATROSKA_ID_TAGDEFAULT, EBML_UINT, 0, offsetof(MatroskaTag,def) }, + { MATROSKA_ID_TAGDEFAULT_BUG, EBML_UINT, 0, offsetof(MatroskaTag,def) }, { MATROSKA_ID_SIMPLETAG, EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTag,sub), {.n=matroska_simpletag} }, - { MATROSKA_ID_TAGLANG, EBML_NONE }, - { MATROSKA_ID_TAGDEFAULT, EBML_NONE }, + { 0 } +}; + +static EbmlSyntax matroska_tagtargets[] = { + { MATROSKA_ID_TAGTARGETS_TYPE, EBML_STR, 0, offsetof(MatroskaTagTarget,type) }, + { MATROSKA_ID_TAGTARGETS_TYPEVALUE, EBML_UINT, 0, offsetof(MatroskaTagTarget,typevalue), {.u=50} }, + { MATROSKA_ID_TAGTARGETS_TRACKUID, EBML_UINT, 0, offsetof(MatroskaTagTarget,trackuid) }, + { MATROSKA_ID_TAGTARGETS_CHAPTERUID,EBML_UINT, 0, offsetof(MatroskaTagTarget,chapteruid) }, + { MATROSKA_ID_TAGTARGETS_ATTACHUID, EBML_UINT, 0, offsetof(MatroskaTagTarget,attachuid) }, { 0 } }; static EbmlSyntax matroska_tag[] = { - { MATROSKA_ID_SIMPLETAG, EBML_NEST, sizeof(MatroskaTag), 0, {.n=matroska_simpletag} }, - { MATROSKA_ID_TAGTARGETS, EBML_NONE }, + { MATROSKA_ID_SIMPLETAG, EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTags,tag), {.n=matroska_simpletag} }, + { MATROSKA_ID_TAGTARGETS, EBML_NEST, 0, offsetof(MatroskaTags,target), {.n=matroska_tagtargets} }, { 0 } }; static EbmlSyntax matroska_tags[] = { - { MATROSKA_ID_TAG, EBML_NEST, 0, offsetof(MatroskaDemuxContext,tags), {.n=matroska_tag} }, + { MATROSKA_ID_TAG, EBML_NEST, sizeof(MatroskaTags), offsetof(MatroskaDemuxContext,tags), {.n=matroska_tag} }, { 0 } }; @@ -434,7 +471,7 @@ static EbmlSyntax matroska_segment[] = { { MATROSKA_ID_CUES, EBML_NEST, 0, 0, {.n=matroska_index } }, { MATROSKA_ID_TAGS, EBML_NEST, 0, 0, {.n=matroska_tags } }, { MATROSKA_ID_SEEKHEAD, EBML_NEST, 0, 0, {.n=matroska_seekhead } }, - { MATROSKA_ID_CLUSTER, EBML_STOP, 0, offsetof(MatroskaDemuxContext,has_cluster_id) }, + { MATROSKA_ID_CLUSTER, EBML_STOP }, { 0 } }; @@ -448,6 +485,7 @@ static EbmlSyntax matroska_blockgroup[] = { { MATROSKA_ID_SIMPLEBLOCK, EBML_BIN, 0, offsetof(MatroskaBlock,bin) }, { MATROSKA_ID_BLOCKDURATION, EBML_UINT, 0, offsetof(MatroskaBlock,duration), {.u=AV_NOPTS_VALUE} }, { MATROSKA_ID_BLOCKREFERENCE, EBML_UINT, 0, offsetof(MatroskaBlock,reference) }, + { 1, EBML_UINT, 0, offsetof(MatroskaBlock,non_simple), {.u=1} }, { 0 } }; @@ -469,24 +507,7 @@ static EbmlSyntax matroska_clusters[] = { { 0 } }; -#define SIZE_OFF(x) sizeof(((AVFormatContext*)0)->x),offsetof(AVFormatContext,x) -const struct { - const char name[16]; - int size; - int offset; -} metadata[] = { - { "TITLE", SIZE_OFF(title) }, - { "ARTIST", SIZE_OFF(author) }, - { "WRITTEN_BY", SIZE_OFF(author) }, - { "LEAD_PERFORMER", SIZE_OFF(author) }, - { "COPYRIGHT", SIZE_OFF(copyright) }, - { "COMMENT", SIZE_OFF(comment) }, - { "ALBUM", SIZE_OFF(album) }, - { "DATE_WRITTEN", SIZE_OFF(year) }, - { "DATE_RELEASED", SIZE_OFF(year) }, - { "PART_NUMBER", SIZE_OFF(track) }, - { "GENRE", SIZE_OFF(genre) }, -}; +static const char *matroska_doctypes[] = { "matroska", "webm" }; /* * Return: Whether we reached the end of a level in the hierarchy or not. @@ -498,7 +519,7 @@ static int ebml_level_end(MatroskaDemuxContext *matroska) if (matroska->num_levels > 0) { MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1]; - if (pos - level->start >= level->length) { + if (pos - level->start >= level->length || matroska->current_id) { matroska->num_levels--; return 1; } @@ -517,8 +538,8 @@ static int ebml_level_end(MatroskaDemuxContext *matroska) static int ebml_read_num(MatroskaDemuxContext *matroska, ByteIOContext *pb, int max_size, uint64_t *number) { - int len_mask = 0x80, read = 1, n = 1; - int64_t total = 0; + int read = 1, n = 1; + uint64_t total = 0; /* The first byte tells us the length in bytes - get_byte() can normally * return 0, but since that's not a valid first ebmlID byte, we can @@ -535,10 +556,7 @@ static int ebml_read_num(MatroskaDemuxContext *matroska, ByteIOContext *pb, } /* get the length of the EBML number */ - while (read <= max_size && !(total & len_mask)) { - read++; - len_mask >>= 1; - } + read = 8 - ff_log2_tab[total]; if (read > max_size) { int64_t pos = url_ftell(pb) - 1; av_log(matroska->ctx, AV_LOG_ERROR, @@ -548,7 +566,7 @@ static int ebml_read_num(MatroskaDemuxContext *matroska, ByteIOContext *pb, } /* read out length */ - total &= ~len_mask; + total ^= 1 << ff_log2_tab[total]; while (n++ < read) total = (total << 8) | get_byte(pb); @@ -557,6 +575,20 @@ static int ebml_read_num(MatroskaDemuxContext *matroska, ByteIOContext *pb, return read; } +/** + * Read a EBML length value. + * This needs special handling for the "unknown length" case which has multiple + * encodings. + */ +static int ebml_read_length(MatroskaDemuxContext *matroska, ByteIOContext *pb, + uint64_t *number) +{ + int res = ebml_read_num(matroska, pb, 8, number); + if (res > 0 && *number + 1 == 1ULL << (7 * res)) + *number = 0xffffffffffffffULL; + return res; +} + /* * Read the next element as an unsigned int. * 0 is success, < 0 is failure. @@ -565,7 +597,7 @@ static int ebml_read_uint(ByteIOContext *pb, int size, uint64_t *num) { int n = 0; - if (size < 1 || size > 8) + if (size > 8) return AVERROR_INVALIDDATA; /* big-endian ordering; build up number */ @@ -582,7 +614,9 @@ static int ebml_read_uint(ByteIOContext *pb, int size, uint64_t *num) */ static int ebml_read_float(ByteIOContext *pb, int size, double *num) { - if (size == 4) { + if (size == 0) { + *num = 0; + } else if (size == 4) { *num= av_int2flt(get_be32(pb)); } else if(size==8){ *num= av_int2dbl(get_be64(pb)); @@ -604,7 +638,7 @@ static int ebml_read_ascii(ByteIOContext *pb, int size, char **str) if (!(*str = av_malloc(size + 1))) return AVERROR(ENOMEM); if (get_buffer(pb, (uint8_t *) *str, size) != size) { - av_free(*str); + av_freep(str); return AVERROR(EIO); } (*str)[size] = '\0'; @@ -624,8 +658,10 @@ static int ebml_read_binary(ByteIOContext *pb, int length, EbmlBin *bin) bin->size = length; bin->pos = url_ftell(pb); - if (get_buffer(pb, bin->data, length) != length) + if (get_buffer(pb, bin->data, length) != length) { + av_freep(&bin->data); return AVERROR(EIO); + } return 0; } @@ -635,7 +671,7 @@ static int ebml_read_binary(ByteIOContext *pb, int length, EbmlBin *bin) * are supposed to be sub-elements which can be read separately. * 0 is success, < 0 is failure. */ -static int ebml_read_master(MatroskaDemuxContext *matroska, int length) +static int ebml_read_master(MatroskaDemuxContext *matroska, uint64_t length) { ByteIOContext *pb = matroska->ctx->pb; MatroskaLevel *level; @@ -662,7 +698,7 @@ static int matroska_ebmlnum_uint(MatroskaDemuxContext *matroska, { ByteIOContext pb; init_put_byte(&pb, data, size, 0, NULL, NULL, NULL, NULL); - return ebml_read_num(matroska, &pb, 8, num); + return ebml_read_num(matroska, &pb, FFMIN(size, 8), num); } /* @@ -694,6 +730,10 @@ static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax, for (i=0; syntax[i].id; i++) if (id == syntax[i].id) break; + if (!syntax[i].id && id == MATROSKA_ID_CLUSTER && + matroska->num_levels > 0 && + matroska->levels[matroska->num_levels-1].length == 0xffffffffffffff) + return 0; // we reached the end of an unknown size cluster if (!syntax[i].id && id != EBML_ID_VOID && id != EBML_ID_CRC32) av_log(matroska->ctx, AV_LOG_INFO, "Unknown entry 0x%X\n", id); return ebml_parse_elem(matroska, &syntax[i], data); @@ -702,10 +742,14 @@ static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax, static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax, void *data) { - uint64_t id; - int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id); - id |= 1 << 7*res; - return res < 0 ? res : ebml_parse_id(matroska, syntax, id, data); + if (!matroska->current_id) { + uint64_t id; + int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id); + if (res < 0) + return res; + matroska->current_id = id | 1 << 7*res; + } + return ebml_parse_id(matroska, syntax, matroska->current_id, data); } static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax, @@ -750,9 +794,11 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska, list->nb_elem++; } - if (syntax->type != EBML_PASS && syntax->type != EBML_STOP) - if ((res = ebml_read_num(matroska, pb, 8, &length)) < 0) + if (syntax->type != EBML_PASS && syntax->type != EBML_STOP) { + matroska->current_id = 0; + if ((res = ebml_read_length(matroska, pb, &length)) < 0) return res; + } switch (syntax->type) { case EBML_UINT: res = ebml_read_uint (pb, length, data); break; @@ -766,7 +812,7 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska, matroska->segment_start = url_ftell(matroska->ctx->pb); return ebml_parse_nest(matroska, syntax->def.n, data); case EBML_PASS: return ebml_parse_id(matroska, syntax->def.n, id, data); - case EBML_STOP: *(int *)data = 1; return 1; + case EBML_STOP: return 1; default: return url_fseek(pb,length,SEEK_CUR)<0 ? AVERROR(EIO) : 0; } if (res == AVERROR_INVALIDDATA) @@ -806,8 +852,7 @@ static void ebml_free(EbmlSyntax *syntax, void *data) static int matroska_probe(AVProbeData *p) { uint64_t total = 0; - int len_mask = 0x80, size = 1, n = 1; - static const char probe_data[] = "matroska"; + int len_mask = 0x80, size = 1, n = 1, i; /* EBML header? */ if (AV_RB32(p->buf) != EBML_ID_HEADER) @@ -829,15 +874,19 @@ static int matroska_probe(AVProbeData *p) if (p->buf_size < 4 + size + total) return 0; - /* The header must contain the document type 'matroska'. For now, + /* The header should contain a known document type. For now, * we don't parse the whole header but simply check for the * availability of that array of characters inside the header. * Not fully fool-proof, but good enough. */ - for (n = 4+size; n <= 4+size+total-(sizeof(probe_data)-1); n++) - if (!memcmp(p->buf+n, probe_data, sizeof(probe_data)-1)) - return AVPROBE_SCORE_MAX; + for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++) { + int probelen = strlen(matroska_doctypes[i]); + for (n = 4+size; n <= 4+size+total-probelen; n++) + if (!memcmp(p->buf+n, matroska_doctypes[i], probelen)) + return AVPROBE_SCORE_MAX; + } - return 0; + // probably valid EBML header but no recognized doctype + return AVPROBE_SCORE_MAX/2; } static MatroskaTrack *matroska_find_track_by_num(MatroskaDemuxContext *matroska, @@ -865,16 +914,18 @@ static int matroska_decode_buffer(uint8_t** buf, int* buf_size, int result = 0; int olen; + if (pkt_size >= 10000000) + return -1; + switch (encodings[0].compression.algo) { case MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP: return encodings[0].compression.settings.size; case MATROSKA_TRACK_ENCODING_COMP_LZO: do { olen = pkt_size *= 3; - pkt_data = av_realloc(pkt_data, - pkt_size+LZO_OUTPUT_PADDING); - result = lzo1x_decode(pkt_data, &olen, data, &isize); - } while (result==LZO_OUTPUT_FULL && pkt_size<10000000); + pkt_data = av_realloc(pkt_data, pkt_size+AV_LZO_OUTPUT_PADDING); + result = av_lzo1x_decode(pkt_data, &olen, data, &isize); + } while (result==AV_LZO_OUTPUT_FULL && pkt_size<10000000); if (result) goto failed; pkt_size -= olen; @@ -973,24 +1024,61 @@ static void matroska_merge_packets(AVPacket *out, AVPacket *in) av_free(in); } -static void matroska_convert_tags(AVFormatContext *s, EbmlList *list) +static void matroska_convert_tag(AVFormatContext *s, EbmlList *list, + AVMetadata **metadata, char *prefix) { MatroskaTag *tags = list->elem; - int i, j; + char key[1024]; + int i; for (i=0; i < list->nb_elem; i++) { - for (j=0; j < FF_ARRAY_ELEMS(metadata); j++){ - if (!strcmp(tags[i].name, metadata[j].name)) { - int *ptr = (int *)((char *)s + metadata[j].offset); - if (*ptr) continue; - if (metadata[j].size > sizeof(int)) - av_strlcpy((char *)ptr, tags[i].string, metadata[j].size); - else - *ptr = atoi(tags[i].string); - } - } + const char *lang = strcmp(tags[i].lang, "und") ? tags[i].lang : NULL; + if (prefix) snprintf(key, sizeof(key), "%s/%s", prefix, tags[i].name); + else av_strlcpy(key, tags[i].name, sizeof(key)); + if (tags[i].def || !lang) { + av_metadata_set2(metadata, key, tags[i].string, 0); if (tags[i].sub.nb_elem) - matroska_convert_tags(s, &tags[i].sub); + matroska_convert_tag(s, &tags[i].sub, metadata, key); + } + if (lang) { + av_strlcat(key, "-", sizeof(key)); + av_strlcat(key, lang, sizeof(key)); + av_metadata_set2(metadata, key, tags[i].string, 0); + if (tags[i].sub.nb_elem) + matroska_convert_tag(s, &tags[i].sub, metadata, key); + } + } +} + +static void matroska_convert_tags(AVFormatContext *s) +{ + MatroskaDemuxContext *matroska = s->priv_data; + MatroskaTags *tags = matroska->tags.elem; + int i, j; + + for (i=0; i < matroska->tags.nb_elem; i++) { + if (tags[i].target.attachuid) { + MatroskaAttachement *attachment = matroska->attachments.elem; + for (j=0; jattachments.nb_elem; j++) + if (attachment[j].uid == tags[i].target.attachuid) + matroska_convert_tag(s, &tags[i].tag, + &attachment[j].stream->metadata, NULL); + } else if (tags[i].target.chapteruid) { + MatroskaChapter *chapter = matroska->chapters.elem; + for (j=0; jchapters.nb_elem; j++) + if (chapter[j].uid == tags[i].target.chapteruid) + matroska_convert_tag(s, &tags[i].tag, + &chapter[j].chapter->metadata, NULL); + } else if (tags[i].target.trackuid) { + MatroskaTrack *track = matroska->tracks.elem; + for (j=0; jtracks.nb_elem; j++) + if (track[j].uid == tags[i].target.trackuid) + matroska_convert_tag(s, &tags[i].tag, + &track[j].stream->metadata, NULL); + } else { + matroska_convert_tag(s, &tags[i].tag, &s->metadata, + tags[i].target.type); + } } } @@ -1000,9 +1088,15 @@ static void matroska_execute_seekhead(MatroskaDemuxContext *matroska) MatroskaSeekhead *seekhead = seekhead_list->elem; uint32_t level_up = matroska->level_up; int64_t before_pos = url_ftell(matroska->ctx->pb); + uint32_t saved_id = matroska->current_id; MatroskaLevel level; int i; + // we should not do any seeking in the streaming case + if (url_is_streamed(matroska->ctx->pb) || + (matroska->ctx->flags & AVFMT_FLAG_IGNIDX)) + return; + for (i=0; inb_elem; i++) { int64_t offset = seekhead[i].pos + matroska->segment_start; @@ -1028,6 +1122,7 @@ static void matroska_execute_seekhead(MatroskaDemuxContext *matroska) level.length = (uint64_t)-1; matroska->levels[matroska->num_levels] = level; matroska->num_levels++; + matroska->current_id = 0; ebml_parse(matroska, matroska_segment, matroska); @@ -1042,6 +1137,7 @@ static void matroska_execute_seekhead(MatroskaDemuxContext *matroska) /* seek back */ url_fseek(matroska->ctx->pb, before_pos, SEEK_SET); matroska->level_up = level_up; + matroska->current_id = saved_id; } static int matroska_aac_profile(char *codec_id) @@ -1079,35 +1175,40 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) uint64_t max_start = 0; Ebml ebml = { 0 }; AVStream *st; - int i, j; + int i, j, res; matroska->ctx = s; /* First read the EBML header. */ if (ebml_parse(matroska, ebml_syntax, &ebml) || ebml.version > EBML_VERSION || ebml.max_size > sizeof(uint64_t) - || ebml.id_length > sizeof(uint32_t) || strcmp(ebml.doctype, "matroska") - || ebml.doctype_version > 2) { + || ebml.id_length > sizeof(uint32_t) || ebml.doctype_version > 2) { av_log(matroska->ctx, AV_LOG_ERROR, "EBML header using unsupported features\n" "(EBML version %"PRIu64", doctype %s, doc version %"PRIu64")\n", ebml.version, ebml.doctype, ebml.doctype_version); - return AVERROR_NOFMT; + ebml_free(ebml_syntax, &ebml); + return AVERROR_PATCHWELCOME; + } + for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++) + if (!strcmp(ebml.doctype, matroska_doctypes[i])) + break; + if (i >= FF_ARRAY_ELEMS(matroska_doctypes)) { + av_log(s, AV_LOG_WARNING, "Unknown EBML doctype '%s'\n", ebml.doctype); } ebml_free(ebml_syntax, &ebml); /* The next thing is a segment. */ - if (ebml_parse(matroska, matroska_segments, matroska) < 0) - return -1; + if ((res = ebml_parse(matroska, matroska_segments, matroska)) < 0) + return res; matroska_execute_seekhead(matroska); + if (!matroska->time_scale) + matroska->time_scale = 1000000; if (matroska->duration) matroska->ctx->duration = matroska->duration * matroska->time_scale * 1000 / AV_TIME_BASE; - if (matroska->title) - strncpy(matroska->ctx->title, matroska->title, - sizeof(matroska->ctx->title)-1); - matroska_convert_tags(s, &matroska->tags); + av_metadata_set2(&s->metadata, "title", matroska->title, 0); tracks = matroska->tracks.elem; for (i=0; i < matroska->tracks.nb_elem; i++) { @@ -1118,6 +1219,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) uint8_t *extradata = NULL; int extradata_size = 0; int extradata_offset = 0; + ByteIOContext b; /* Apply some sanity checks. */ if (track->type != MATROSKA_TRACK_TYPE_VIDEO && @@ -1196,23 +1298,23 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) if (!strcmp(track->codec_id, "V_MS/VFW/FOURCC") && track->codec_priv.size >= 40 && track->codec_priv.data != NULL) { + track->ms_compat = 1; track->video.fourcc = AV_RL32(track->codec_priv.data + 16); - codec_id = codec_get_id(codec_bmp_tags, track->video.fourcc); + codec_id = ff_codec_get_id(ff_codec_bmp_tags, track->video.fourcc); + extradata_offset = 40; } else if (!strcmp(track->codec_id, "A_MS/ACM") - && track->codec_priv.size >= 18 + && track->codec_priv.size >= 14 && track->codec_priv.data != NULL) { - ByteIOContext b; init_put_byte(&b, track->codec_priv.data, track->codec_priv.size, URL_RDONLY, NULL, NULL, NULL, NULL); - get_wav_header(&b, st->codec, track->codec_priv.size); + ff_get_wav_header(&b, st->codec, track->codec_priv.size); codec_id = st->codec->codec_id; - extradata_offset = 18; - track->codec_priv.size -= extradata_offset; + extradata_offset = FFMIN(track->codec_priv.size, 18); } else if (!strcmp(track->codec_id, "V_QUICKTIME") && (track->codec_priv.size >= 86) && (track->codec_priv.data != NULL)) { track->video.fourcc = AV_RL32(track->codec_priv.data); - codec_id=codec_get_id(codec_movvideo_tags, track->video.fourcc); + codec_id=ff_codec_get_id(codec_movvideo_tags, track->video.fourcc); } else if (codec_id == CODEC_ID_PCM_S16BE) { switch (track->audio.bitdepth) { case 8: codec_id = CODEC_ID_PCM_U8; break; @@ -1244,7 +1346,6 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) } else extradata_size = 2; } else if (codec_id == CODEC_ID_TTA) { - ByteIOContext b; extradata_size = 30; extradata = av_mallocz(extradata_size); if (extradata == NULL) @@ -1260,17 +1361,16 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) } else if (codec_id == CODEC_ID_RV10 || codec_id == CODEC_ID_RV20 || codec_id == CODEC_ID_RV30 || codec_id == CODEC_ID_RV40) { extradata_offset = 26; - track->codec_priv.size -= extradata_offset; } else if (codec_id == CODEC_ID_RA_144) { track->audio.out_samplerate = 8000; track->audio.channels = 1; } else if (codec_id == CODEC_ID_RA_288 || codec_id == CODEC_ID_COOK || - codec_id == CODEC_ID_ATRAC3) { - ByteIOContext b; - + codec_id == CODEC_ID_ATRAC3 || codec_id == CODEC_ID_SIPR) { + int flavor; init_put_byte(&b, track->codec_priv.data,track->codec_priv.size, 0, NULL, NULL, NULL, NULL); - url_fskip(&b, 24); + url_fskip(&b, 22); + flavor = get_be16(&b); track->audio.coded_framesize = get_be32(&b); url_fskip(&b, 12); track->audio.sub_packet_h = get_be16(&b); @@ -1281,11 +1381,16 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) st->codec->block_align = track->audio.coded_framesize; track->codec_priv.size = 0; } else { + if (codec_id == CODEC_ID_SIPR && flavor < 4) { + const int sipr_bit_rate[4] = { 6504, 8496, 5000, 16000 }; + track->audio.sub_packet_size = ff_sipr_subpk_size[flavor]; + st->codec->bit_rate = sipr_bit_rate[flavor]; + } st->codec->block_align = track->audio.sub_packet_size; extradata_offset = 78; - track->codec_priv.size -= extradata_offset; } } + track->codec_priv.size -= extradata_offset; if (codec_id == CODEC_ID_NONE) av_log(matroska->ctx, AV_LOG_INFO, @@ -1298,31 +1403,36 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) st->codec->codec_id = codec_id; st->start_time = 0; if (strcmp(track->language, "und")) - av_strlcpy(st->language, track->language, 4); + av_metadata_set2(&st->metadata, "language", track->language, 0); + av_metadata_set2(&st->metadata, "title", track->name, 0); if (track->flag_default) st->disposition |= AV_DISPOSITION_DEFAULT; + if (track->flag_forced) + st->disposition |= AV_DISPOSITION_FORCED; if (track->default_duration) av_reduce(&st->codec->time_base.num, &st->codec->time_base.den, track->default_duration, 1000000000, 30000); - if(extradata){ - st->codec->extradata = extradata; - st->codec->extradata_size = extradata_size; - } else if(track->codec_priv.data && track->codec_priv.size > 0){ - st->codec->extradata = av_mallocz(track->codec_priv.size + - FF_INPUT_BUFFER_PADDING_SIZE); - if(st->codec->extradata == NULL) - return AVERROR(ENOMEM); - st->codec->extradata_size = track->codec_priv.size; - memcpy(st->codec->extradata, - track->codec_priv.data + extradata_offset, - track->codec_priv.size); + if (!st->codec->extradata) { + if(extradata){ + st->codec->extradata = extradata; + st->codec->extradata_size = extradata_size; + } else if(track->codec_priv.data && track->codec_priv.size > 0){ + st->codec->extradata = av_mallocz(track->codec_priv.size + + FF_INPUT_BUFFER_PADDING_SIZE); + if(st->codec->extradata == NULL) + return AVERROR(ENOMEM); + st->codec->extradata_size = track->codec_priv.size; + memcpy(st->codec->extradata, + track->codec_priv.data + extradata_offset, + track->codec_priv.size); + } } if (track->type == MATROSKA_TRACK_TYPE_VIDEO) { - st->codec->codec_type = CODEC_TYPE_VIDEO; + st->codec->codec_type = AVMEDIA_TYPE_VIDEO; st->codec->codec_tag = track->video.fourcc; st->codec->width = track->video.pixel_width; st->codec->height = track->video.pixel_height; @@ -1331,13 +1441,18 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) st->codec->height * track->video.display_width, st->codec-> width * track->video.display_height, 255); + if (st->codec->codec_id != CODEC_ID_H264) st->need_parsing = AVSTREAM_PARSE_HEADERS; + if (track->default_duration) + st->avg_frame_rate = av_d2q(1000000000.0/track->default_duration, INT_MAX); } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) { - st->codec->codec_type = CODEC_TYPE_AUDIO; + st->codec->codec_type = AVMEDIA_TYPE_AUDIO; st->codec->sample_rate = track->audio.out_samplerate; st->codec->channels = track->audio.channels; + if (st->codec->codec_id != CODEC_ID_AAC) + st->need_parsing = AVSTREAM_PARSE_HEADERS; } else if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE) { - st->codec->codec_type = CODEC_TYPE_SUBTITLE; + st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE; } } @@ -1350,9 +1465,9 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) AVStream *st = av_new_stream(s, 0); if (st == NULL) break; - st->filename = av_strdup(attachements[j].filename); + av_metadata_set2(&st->metadata, "filename",attachements[j].filename, 0); st->codec->codec_id = CODEC_ID_NONE; - st->codec->codec_type = CODEC_TYPE_ATTACHMENT; + st->codec->codec_type = AVMEDIA_TYPE_ATTACHMENT; st->codec->extradata = av_malloc(attachements[j].bin.size); if(st->codec->extradata == NULL) break; @@ -1366,6 +1481,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) break; } } + attachements[j].stream = st; } } @@ -1373,9 +1489,12 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) for (i=0; inb_elem; i++) if (chapters[i].start != AV_NOPTS_VALUE && chapters[i].uid && (max_start==0 || chapters[i].start > max_start)) { + chapters[i].chapter = ff_new_chapter(s, chapters[i].uid, (AVRational){1, 1000000000}, chapters[i].start, chapters[i].end, chapters[i].title); + av_metadata_set2(&chapters[i].chapter->metadata, + "title", chapters[i].title, 0); max_start = chapters[i].start; } @@ -1400,6 +1519,8 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) } } + matroska_convert_tags(s); + return 0; } @@ -1484,7 +1605,7 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, flags = *data++; size -= 3; if (is_keyframe == -1) - is_keyframe = flags & 0x80 ? PKT_FLAG_KEY : 0; + is_keyframe = flags & 0x80 ? AV_PKT_FLAG_KEY : 0; if (cluster_time != (uint64_t)-1 && (block_time >= 0 || cluster_time >= -block_time)) { @@ -1581,9 +1702,11 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, if (res == 0) { for (n = 0; n < laces; n++) { - if (st->codec->codec_id == CODEC_ID_RA_288 || - st->codec->codec_id == CODEC_ID_COOK || - st->codec->codec_id == CODEC_ID_ATRAC3) { + if ((st->codec->codec_id == CODEC_ID_RA_288 || + st->codec->codec_id == CODEC_ID_COOK || + st->codec->codec_id == CODEC_ID_SIPR || + st->codec->codec_id == CODEC_ID_ATRAC3) && + st->codec->block_align && track->audio.sub_packet_size) { int a = st->codec->block_align; int sps = track->audio.sub_packet_size; int cfs = track->audio.coded_framesize; @@ -1597,11 +1720,15 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, for (x=0; xaudio.buf+x*2*w+y*cfs, data+x*cfs, cfs); + else if (st->codec->codec_id == CODEC_ID_SIPR) + memcpy(track->audio.buf + y*w, data, w); else for (x=0; xaudio.buf+sps*(h*x+((h+1)/2)*(y&1)+(y>>1)), data+x*sps, sps); if (++track->audio.sub_packet_cnt >= h) { + if (st->codec->codec_id == CODEC_ID_SIPR) + ff_rm_reorder_sipr_data(track->audio.buf, h, w); track->audio.sub_packet_cnt = 0; track->audio.pkt_cnt = h*w / a; } @@ -1620,6 +1747,11 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, int offset = 0, pkt_size = lace_size[n]; uint8_t *pkt_data = data; + if (pkt_size > size) { + av_log(matroska->ctx, AV_LOG_ERROR, "Invalid packet size\n"); + break; + } + if (encodings && encodings->scope & 1) { offset = matroska_decode_buffer(&pkt_data,&pkt_size, track); if (offset < 0) @@ -1631,7 +1763,6 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, if (av_new_packet(pkt, pkt_size+offset) < 0) { av_free(pkt); res = AVERROR(ENOMEM); - n = laces-1; break; } if (offset) @@ -1645,7 +1776,10 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, pkt->flags = is_keyframe; pkt->stream_index = st->index; - pkt->pts = timecode; + if (track->ms_compat) + pkt->dts = timecode; + else + pkt->pts = timecode; pkt->pos = pos; if (st->codec->codec_id == CODEC_ID_TEXT) pkt->convergence_duration = duration; @@ -1658,7 +1792,8 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, if (matroska->prev_pkt && timecode != AV_NOPTS_VALUE && matroska->prev_pkt->pts == timecode && - matroska->prev_pkt->stream_index == st->index) + matroska->prev_pkt->stream_index == st->index && + st->codec->codec_id == CODEC_ID_SSA) matroska_merge_packets(matroska->prev_pkt, pkt); else { dynarray_add(&matroska->packets,&matroska->num_packets,pkt); @@ -1669,6 +1804,7 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, if (timecode != AV_NOPTS_VALUE) timecode = duration ? timecode + duration : AV_NOPTS_VALUE; data += lace_size[n]; + size -= lace_size[n]; } } @@ -1684,24 +1820,20 @@ static int matroska_parse_cluster(MatroskaDemuxContext *matroska) int i, res; int64_t pos = url_ftell(matroska->ctx->pb); matroska->prev_pkt = NULL; - if (matroska->has_cluster_id){ - /* For the first cluster we parse, its ID was already read as - part of matroska_read_header(), so don't read it again */ - res = ebml_parse_id(matroska, matroska_clusters, - MATROSKA_ID_CLUSTER, &cluster); + if (matroska->current_id) pos -= 4; /* sizeof the ID which was already read */ - matroska->has_cluster_id = 0; - } else - res = ebml_parse(matroska, matroska_clusters, &cluster); + res = ebml_parse(matroska, matroska_clusters, &cluster); blocks_list = &cluster.blocks; blocks = blocks_list->elem; for (i=0; inb_elem; i++) - if (blocks[i].bin.size > 0) + if (blocks[i].bin.size > 0 && blocks[i].bin.data) { + int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1; res=matroska_parse_block(matroska, blocks[i].bin.data, blocks[i].bin.size, blocks[i].bin.pos, cluster.timecode, - blocks[i].duration, !blocks[i].reference, + blocks[i].duration, is_keyframe, pos); + } ebml_free(matroska_cluster, &cluster); if (res < 0) matroska->done = 1; return res; @@ -1713,7 +1845,7 @@ static int matroska_read_packet(AVFormatContext *s, AVPacket *pkt) while (matroska_deliver_packet(matroska, pkt)) { if (matroska->done) - return AVERROR(EIO); + return AVERROR_EOF; matroska_parse_cluster(matroska); } @@ -1783,12 +1915,13 @@ static int matroska_read_close(AVFormatContext *s) } AVInputFormat matroska_demuxer = { - "matroska", - NULL_IF_CONFIG_SMALL("Matroska file format"), + "matroska,webm", + NULL_IF_CONFIG_SMALL("Matroska/WebM file format"), sizeof(MatroskaDemuxContext), matroska_probe, matroska_read_header, matroska_read_packet, matroska_read_close, matroska_read_seek, + .metadata_conv = ff_mkv_metadata_conv, };