double time_scale;
uint64_t default_duration;
uint64_t flag_default;
+ uint64_t flag_forced;
MatroskaTrackVideo video;
MatroskaTrackAudio audio;
EbmlList encodings;
int num_levels;
MatroskaLevel levels[EBML_MAX_DEPTH];
int level_up;
+ uint32_t current_id;
uint64_t time_scale;
double duration;
AVPacket *prev_pkt;
int done;
- int has_cluster_id;
/* What to skip before effectively reading a packet. */
int skip_to_keyframe;
{ MATROSKA_ID_TRACKDEFAULTDURATION, EBML_UINT, 0, offsetof(MatroskaTrack,default_duration) },
{ MATROSKA_ID_TRACKTIMECODESCALE, EBML_FLOAT,0, offsetof(MatroskaTrack,time_scale), {.f=1.0} },
{ MATROSKA_ID_TRACKFLAGDEFAULT, EBML_UINT, 0, offsetof(MatroskaTrack,flag_default), {.u=1} },
+ { MATROSKA_ID_TRACKFLAGFORCED, EBML_UINT, 0, offsetof(MatroskaTrack,flag_forced), {.u=0} },
{ MATROSKA_ID_TRACKVIDEO, EBML_NEST, 0, offsetof(MatroskaTrack,video), {.n=matroska_track_video} },
{ MATROSKA_ID_TRACKAUDIO, EBML_NEST, 0, offsetof(MatroskaTrack,audio), {.n=matroska_track_audio} },
{ MATROSKA_ID_TRACKCONTENTENCODINGS,EBML_NEST, 0, 0, {.n=matroska_track_encodings} },
{ MATROSKA_ID_TRACKFLAGENABLED, EBML_NONE },
- { MATROSKA_ID_TRACKFLAGFORCED, EBML_NONE },
{ MATROSKA_ID_TRACKFLAGLACING, EBML_NONE },
{ MATROSKA_ID_CODECNAME, EBML_NONE },
{ MATROSKA_ID_CODECDECODEALL, EBML_NONE },
{ MATROSKA_ID_TAGSTRING, EBML_UTF8, 0, offsetof(MatroskaTag,string) },
{ MATROSKA_ID_TAGLANG, EBML_STR, 0, offsetof(MatroskaTag,lang), {.s="und"} },
{ MATROSKA_ID_TAGDEFAULT, EBML_UINT, 0, offsetof(MatroskaTag,def) },
+ { MATROSKA_ID_TAGDEFAULT_BUG, EBML_UINT, 0, offsetof(MatroskaTag,def) },
{ MATROSKA_ID_SIMPLETAG, EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTag,sub), {.n=matroska_simpletag} },
{ 0 }
};
{ MATROSKA_ID_CUES, EBML_NEST, 0, 0, {.n=matroska_index } },
{ MATROSKA_ID_TAGS, EBML_NEST, 0, 0, {.n=matroska_tags } },
{ MATROSKA_ID_SEEKHEAD, EBML_NEST, 0, 0, {.n=matroska_seekhead } },
- { MATROSKA_ID_CLUSTER, EBML_STOP, 0, offsetof(MatroskaDemuxContext,has_cluster_id) },
+ { MATROSKA_ID_CLUSTER, EBML_STOP },
{ 0 }
};
if (matroska->num_levels > 0) {
MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
- if (pos - level->start >= level->length) {
+ if (pos - level->start >= level->length || matroska->current_id) {
matroska->num_levels--;
return 1;
}
static int ebml_read_num(MatroskaDemuxContext *matroska, ByteIOContext *pb,
int max_size, uint64_t *number)
{
- int len_mask = 0x80, read = 1, n = 1;
- int64_t total = 0;
+ int read = 1, n = 1;
+ uint64_t total = 0;
/* The first byte tells us the length in bytes - get_byte() can normally
* return 0, but since that's not a valid first ebmlID byte, we can
}
/* get the length of the EBML number */
- while (read <= max_size && !(total & len_mask)) {
- read++;
- len_mask >>= 1;
- }
+ read = 8 - ff_log2_tab[total];
if (read > max_size) {
int64_t pos = url_ftell(pb) - 1;
av_log(matroska->ctx, AV_LOG_ERROR,
}
/* read out length */
- total &= ~len_mask;
+ total ^= 1 << ff_log2_tab[total];
while (n++ < read)
total = (total << 8) | get_byte(pb);
return read;
}
+/**
+ * Read a EBML length value.
+ * This needs special handling for the "unknown length" case which has multiple
+ * encodings.
+ */
+static int ebml_read_length(MatroskaDemuxContext *matroska, ByteIOContext *pb,
+ uint64_t *number)
+{
+ int res = ebml_read_num(matroska, pb, 8, number);
+ if (res > 0 && *number + 1 == 1ULL << (7 * res))
+ *number = 0xffffffffffffffULL;
+ return res;
+}
+
/*
* Read the next element as an unsigned int.
* 0 is success, < 0 is failure.
{
int n = 0;
- if (size < 1 || size > 8)
+ if (size > 8)
return AVERROR_INVALIDDATA;
/* big-endian ordering; build up number */
*/
static int ebml_read_float(ByteIOContext *pb, int size, double *num)
{
- if (size == 4) {
+ if (size == 0) {
+ *num = 0;
+ } else if (size == 4) {
*num= av_int2flt(get_be32(pb));
} else if(size==8){
*num= av_int2dbl(get_be64(pb));
* are supposed to be sub-elements which can be read separately.
* 0 is success, < 0 is failure.
*/
-static int ebml_read_master(MatroskaDemuxContext *matroska, int length)
+static int ebml_read_master(MatroskaDemuxContext *matroska, uint64_t length)
{
ByteIOContext *pb = matroska->ctx->pb;
MatroskaLevel *level;
for (i=0; syntax[i].id; i++)
if (id == syntax[i].id)
break;
+ if (!syntax[i].id && id == MATROSKA_ID_CLUSTER &&
+ matroska->num_levels > 0 &&
+ matroska->levels[matroska->num_levels-1].length == 0xffffffffffffff)
+ return 0; // we reached the end of an unknown size cluster
if (!syntax[i].id && id != EBML_ID_VOID && id != EBML_ID_CRC32)
av_log(matroska->ctx, AV_LOG_INFO, "Unknown entry 0x%X\n", id);
return ebml_parse_elem(matroska, &syntax[i], data);
static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
void *data)
{
- uint64_t id;
- int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id);
- id |= 1 << 7*res;
- return res < 0 ? res : ebml_parse_id(matroska, syntax, id, data);
+ if (!matroska->current_id) {
+ uint64_t id;
+ int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id);
+ if (res < 0)
+ return res;
+ matroska->current_id = id | 1 << 7*res;
+ }
+ return ebml_parse_id(matroska, syntax, matroska->current_id, data);
}
static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
list->nb_elem++;
}
- if (syntax->type != EBML_PASS && syntax->type != EBML_STOP)
- if ((res = ebml_read_num(matroska, pb, 8, &length)) < 0)
+ if (syntax->type != EBML_PASS && syntax->type != EBML_STOP) {
+ matroska->current_id = 0;
+ if ((res = ebml_read_length(matroska, pb, &length)) < 0)
return res;
+ }
switch (syntax->type) {
case EBML_UINT: res = ebml_read_uint (pb, length, data); break;
matroska->segment_start = url_ftell(matroska->ctx->pb);
return ebml_parse_nest(matroska, syntax->def.n, data);
case EBML_PASS: return ebml_parse_id(matroska, syntax->def.n, id, data);
- case EBML_STOP: *(int *)data = 1; return 1;
+ case EBML_STOP: return 1;
default: return url_fseek(pb,length,SEEK_CUR)<0 ? AVERROR(EIO) : 0;
}
if (res == AVERROR_INVALIDDATA)
return AVPROBE_SCORE_MAX;
}
- return 0;
+ // probably valid EBML header but no recognized doctype
+ return AVPROBE_SCORE_MAX/2;
}
static MatroskaTrack *matroska_find_track_by_num(MatroskaDemuxContext *matroska,
int result = 0;
int olen;
+ if (pkt_size >= 10000000)
+ return -1;
+
switch (encodings[0].compression.algo) {
case MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP:
return encodings[0].compression.settings.size;
matroska_convert_tag(s, &tags[i].sub, metadata, key);
}
}
+ ff_metadata_conv(metadata, NULL, ff_mkv_metadata_conv);
}
static void matroska_convert_tags(AVFormatContext *s)
MatroskaSeekhead *seekhead = seekhead_list->elem;
uint32_t level_up = matroska->level_up;
int64_t before_pos = url_ftell(matroska->ctx->pb);
+ uint32_t saved_id = matroska->current_id;
MatroskaLevel level;
int i;
+ // we should not do any seeking in the streaming case
+ if (url_is_streamed(matroska->ctx->pb) ||
+ (matroska->ctx->flags & AVFMT_FLAG_IGNIDX))
+ return;
+
for (i=0; i<seekhead_list->nb_elem; i++) {
int64_t offset = seekhead[i].pos + matroska->segment_start;
level.length = (uint64_t)-1;
matroska->levels[matroska->num_levels] = level;
matroska->num_levels++;
+ matroska->current_id = 0;
ebml_parse(matroska, matroska_segment, matroska);
/* seek back */
url_fseek(matroska->ctx->pb, before_pos, SEEK_SET);
matroska->level_up = level_up;
+ matroska->current_id = saved_id;
}
static int matroska_aac_profile(char *codec_id)
uint64_t max_start = 0;
Ebml ebml = { 0 };
AVStream *st;
- int i, j;
+ int i, j, res;
matroska->ctx = s;
if (!strcmp(ebml.doctype, matroska_doctypes[i]))
break;
if (i >= FF_ARRAY_ELEMS(matroska_doctypes)) {
- av_log(s, AV_LOG_ERROR, "Unknown EBML doctype '%s'\n", ebml.doctype);
- ebml_free(ebml_syntax, &ebml);
- return AVERROR_PATCHWELCOME;
+ av_log(s, AV_LOG_WARNING, "Unknown EBML doctype '%s'\n", ebml.doctype);
}
- av_metadata_set2(&s->metadata, "doctype", ebml.doctype, 0);
ebml_free(ebml_syntax, &ebml);
/* The next thing is a segment. */
- if (ebml_parse(matroska, matroska_segments, matroska) < 0)
- return -1;
+ if ((res = ebml_parse(matroska, matroska_segments, matroska)) < 0)
+ return res;
matroska_execute_seekhead(matroska);
if (!matroska->time_scale)
if (track->flag_default)
st->disposition |= AV_DISPOSITION_DEFAULT;
+ if (track->flag_forced)
+ st->disposition |= AV_DISPOSITION_FORCED;
if (track->default_duration)
av_reduce(&st->codec->time_base.num, &st->codec->time_base.den,
int offset = 0, pkt_size = lace_size[n];
uint8_t *pkt_data = data;
- if (lace_size[n] > size) {
+ if (pkt_size > size) {
av_log(matroska->ctx, AV_LOG_ERROR, "Invalid packet size\n");
break;
}
if (matroska->prev_pkt &&
timecode != AV_NOPTS_VALUE &&
matroska->prev_pkt->pts == timecode &&
- matroska->prev_pkt->stream_index == st->index)
+ matroska->prev_pkt->stream_index == st->index &&
+ st->codec->codec_id == CODEC_ID_SSA)
matroska_merge_packets(matroska->prev_pkt, pkt);
else {
dynarray_add(&matroska->packets,&matroska->num_packets,pkt);
int i, res;
int64_t pos = url_ftell(matroska->ctx->pb);
matroska->prev_pkt = NULL;
- if (matroska->has_cluster_id){
- /* For the first cluster we parse, its ID was already read as
- part of matroska_read_header(), so don't read it again */
- res = ebml_parse_id(matroska, matroska_clusters,
- MATROSKA_ID_CLUSTER, &cluster);
+ if (matroska->current_id)
pos -= 4; /* sizeof the ID which was already read */
- matroska->has_cluster_id = 0;
- } else
- res = ebml_parse(matroska, matroska_clusters, &cluster);
+ res = ebml_parse(matroska, matroska_clusters, &cluster);
blocks_list = &cluster.blocks;
blocks = blocks_list->elem;
for (i=0; i<blocks_list->nb_elem; i++)
- if (blocks[i].bin.size > 0) {
+ if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
res=matroska_parse_block(matroska,
blocks[i].bin.data, blocks[i].bin.size,
}
AVInputFormat matroska_demuxer = {
- "matroska",
- NULL_IF_CONFIG_SMALL("Matroska file format"),
+ "matroska,webm",
+ NULL_IF_CONFIG_SMALL("Matroska/WebM file format"),
sizeof(MatroskaDemuxContext),
matroska_probe,
matroska_read_header,
matroska_read_packet,
matroska_read_close,
matroska_read_seek,
- .metadata_conv = ff_mkv_metadata_conv,
};