2 * Matroska file demuxer
3 * Copyright (c) 2003-2008 The FFmpeg Project
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Matroska file demuxer
25 * @author Ronald Bultje <rbultje@ronald.bitfreak.net>
26 * @author with a little help from Moritz Bunkus <moritz@bunkus.org>
27 * @author totally reworked by Aurelien Jacobs <aurel@gnuage.org>
28 * @see specs available on the Matroska project page: http://www.matroska.org/
34 #include "avio_internal.h"
35 /* For ff_codec_get_id(). */
40 #include "libavcodec/bytestream.h"
41 #include "libavcodec/mpeg4audio.h"
42 #include "libavutil/base64.h"
43 #include "libavutil/intfloat.h"
44 #include "libavutil/intreadwrite.h"
45 #include "libavutil/avstring.h"
46 #include "libavutil/lzo.h"
47 #include "libavutil/dict.h"
68 typedef const struct EbmlSyntax {
77 const struct EbmlSyntax *n;
97 uint64_t doctype_version;
103 } MatroskaTrackCompression;
108 } MatroskaTrackEncryption;
113 MatroskaTrackCompression compression;
114 MatroskaTrackEncryption encryption;
115 } MatroskaTrackEncoding;
119 uint64_t display_width;
120 uint64_t display_height;
121 uint64_t pixel_width;
122 uint64_t pixel_height;
124 uint64_t stereo_mode;
126 } MatroskaTrackVideo;
130 double out_samplerate;
134 /* real audio header (extracted from extradata) */
141 uint64_t buf_timecode;
143 } MatroskaTrackAudio;
148 } MatroskaTrackPlane;
151 EbmlList combine_planes;
152 } MatroskaTrackOperation;
163 uint64_t default_duration;
164 uint64_t flag_default;
165 uint64_t flag_forced;
166 MatroskaTrackVideo video;
167 MatroskaTrackAudio audio;
168 MatroskaTrackOperation operation;
172 int64_t end_timecode;
174 uint64_t max_block_additional_id;
184 } MatroskaAttachement;
222 MatroskaTagTarget target;
242 AVFormatContext *ctx;
246 MatroskaLevel levels[EBML_MAX_DEPTH];
255 EbmlList attachments;
261 /* byte position of the segment inside the stream */
262 int64_t segment_start;
264 /* the packet queue */
271 /* What to skip before effectively reading a packet. */
272 int skip_to_keyframe;
273 uint64_t skip_to_timecode;
275 /* File has a CUES element, but we defer parsing until it is needed. */
276 int cues_parsing_deferred;
278 int current_cluster_num_blocks;
279 int64_t current_cluster_pos;
280 MatroskaCluster current_cluster;
282 /* File has SSA subtitles which prevent incremental cluster parsing. */
284 } MatroskaDemuxContext;
291 uint64_t additional_id;
295 static EbmlSyntax ebml_header[] = {
296 { EBML_ID_EBMLREADVERSION, EBML_UINT, 0, offsetof(Ebml,version), {.u=EBML_VERSION} },
297 { EBML_ID_EBMLMAXSIZELENGTH, EBML_UINT, 0, offsetof(Ebml,max_size), {.u=8} },
298 { EBML_ID_EBMLMAXIDLENGTH, EBML_UINT, 0, offsetof(Ebml,id_length), {.u=4} },
299 { EBML_ID_DOCTYPE, EBML_STR, 0, offsetof(Ebml,doctype), {.s="(none)"} },
300 { EBML_ID_DOCTYPEREADVERSION, EBML_UINT, 0, offsetof(Ebml,doctype_version), {.u=1} },
301 { EBML_ID_EBMLVERSION, EBML_NONE },
302 { EBML_ID_DOCTYPEVERSION, EBML_NONE },
306 static EbmlSyntax ebml_syntax[] = {
307 { EBML_ID_HEADER, EBML_NEST, 0, 0, {.n=ebml_header} },
311 static EbmlSyntax matroska_info[] = {
312 { MATROSKA_ID_TIMECODESCALE, EBML_UINT, 0, offsetof(MatroskaDemuxContext,time_scale), {.u=1000000} },
313 { MATROSKA_ID_DURATION, EBML_FLOAT, 0, offsetof(MatroskaDemuxContext,duration) },
314 { MATROSKA_ID_TITLE, EBML_UTF8, 0, offsetof(MatroskaDemuxContext,title) },
315 { MATROSKA_ID_WRITINGAPP, EBML_NONE },
316 { MATROSKA_ID_MUXINGAPP, EBML_NONE },
317 { MATROSKA_ID_DATEUTC, EBML_BIN, 0, offsetof(MatroskaDemuxContext,date_utc) },
318 { MATROSKA_ID_SEGMENTUID, EBML_NONE },
322 static EbmlSyntax matroska_track_video[] = {
323 { MATROSKA_ID_VIDEOFRAMERATE, EBML_FLOAT,0, offsetof(MatroskaTrackVideo,frame_rate) },
324 { MATROSKA_ID_VIDEODISPLAYWIDTH, EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_width), {.u=-1} },
325 { MATROSKA_ID_VIDEODISPLAYHEIGHT, EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_height), {.u=-1} },
326 { MATROSKA_ID_VIDEOPIXELWIDTH, EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_width) },
327 { MATROSKA_ID_VIDEOPIXELHEIGHT, EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_height) },
328 { MATROSKA_ID_VIDEOCOLORSPACE, EBML_BIN, 0, offsetof(MatroskaTrackVideo,color_space) },
329 { MATROSKA_ID_VIDEOSTEREOMODE, EBML_UINT, 0, offsetof(MatroskaTrackVideo,stereo_mode) },
330 { MATROSKA_ID_VIDEOALPHAMODE, EBML_UINT, 0, offsetof(MatroskaTrackVideo,alpha_mode) },
331 { MATROSKA_ID_VIDEOPIXELCROPB, EBML_NONE },
332 { MATROSKA_ID_VIDEOPIXELCROPT, EBML_NONE },
333 { MATROSKA_ID_VIDEOPIXELCROPL, EBML_NONE },
334 { MATROSKA_ID_VIDEOPIXELCROPR, EBML_NONE },
335 { MATROSKA_ID_VIDEODISPLAYUNIT, EBML_NONE },
336 { MATROSKA_ID_VIDEOFLAGINTERLACED,EBML_NONE },
337 { MATROSKA_ID_VIDEOASPECTRATIO, EBML_NONE },
341 static EbmlSyntax matroska_track_audio[] = {
342 { MATROSKA_ID_AUDIOSAMPLINGFREQ, EBML_FLOAT,0, offsetof(MatroskaTrackAudio,samplerate), {.f=8000.0} },
343 { MATROSKA_ID_AUDIOOUTSAMPLINGFREQ,EBML_FLOAT,0,offsetof(MatroskaTrackAudio,out_samplerate) },
344 { MATROSKA_ID_AUDIOBITDEPTH, EBML_UINT, 0, offsetof(MatroskaTrackAudio,bitdepth) },
345 { MATROSKA_ID_AUDIOCHANNELS, EBML_UINT, 0, offsetof(MatroskaTrackAudio,channels), {.u=1} },
349 static EbmlSyntax matroska_track_encoding_compression[] = {
350 { MATROSKA_ID_ENCODINGCOMPALGO, EBML_UINT, 0, offsetof(MatroskaTrackCompression,algo), {.u=0} },
351 { MATROSKA_ID_ENCODINGCOMPSETTINGS,EBML_BIN, 0, offsetof(MatroskaTrackCompression,settings) },
355 static EbmlSyntax matroska_track_encoding_encryption[] = {
356 { MATROSKA_ID_ENCODINGENCALGO, EBML_UINT, 0, offsetof(MatroskaTrackEncryption,algo), {.u=0} },
357 { MATROSKA_ID_ENCODINGENCKEYID, EBML_BIN, 0, offsetof(MatroskaTrackEncryption,key_id) },
358 { MATROSKA_ID_ENCODINGENCAESSETTINGS, EBML_NONE },
359 { MATROSKA_ID_ENCODINGSIGALGO, EBML_NONE },
360 { MATROSKA_ID_ENCODINGSIGHASHALGO, EBML_NONE },
361 { MATROSKA_ID_ENCODINGSIGKEYID, EBML_NONE },
362 { MATROSKA_ID_ENCODINGSIGNATURE, EBML_NONE },
365 static EbmlSyntax matroska_track_encoding[] = {
366 { MATROSKA_ID_ENCODINGSCOPE, EBML_UINT, 0, offsetof(MatroskaTrackEncoding,scope), {.u=1} },
367 { MATROSKA_ID_ENCODINGTYPE, EBML_UINT, 0, offsetof(MatroskaTrackEncoding,type), {.u=0} },
368 { MATROSKA_ID_ENCODINGCOMPRESSION,EBML_NEST, 0, offsetof(MatroskaTrackEncoding,compression), {.n=matroska_track_encoding_compression} },
369 { MATROSKA_ID_ENCODINGENCRYPTION, EBML_NEST, 0, offsetof(MatroskaTrackEncoding,encryption), {.n=matroska_track_encoding_encryption} },
370 { MATROSKA_ID_ENCODINGORDER, EBML_NONE },
374 static EbmlSyntax matroska_track_encodings[] = {
375 { MATROSKA_ID_TRACKCONTENTENCODING, EBML_NEST, sizeof(MatroskaTrackEncoding), offsetof(MatroskaTrack,encodings), {.n=matroska_track_encoding} },
379 static EbmlSyntax matroska_track_plane[] = {
380 { MATROSKA_ID_TRACKPLANEUID, EBML_UINT, 0, offsetof(MatroskaTrackPlane,uid) },
381 { MATROSKA_ID_TRACKPLANETYPE, EBML_UINT, 0, offsetof(MatroskaTrackPlane,type) },
385 static EbmlSyntax matroska_track_combine_planes[] = {
386 { MATROSKA_ID_TRACKPLANE, EBML_NEST, sizeof(MatroskaTrackPlane), offsetof(MatroskaTrackOperation,combine_planes), {.n=matroska_track_plane} },
390 static EbmlSyntax matroska_track_operation[] = {
391 { MATROSKA_ID_TRACKCOMBINEPLANES, EBML_NEST, 0, 0, {.n=matroska_track_combine_planes} },
395 static EbmlSyntax matroska_track[] = {
396 { MATROSKA_ID_TRACKNUMBER, EBML_UINT, 0, offsetof(MatroskaTrack,num) },
397 { MATROSKA_ID_TRACKNAME, EBML_UTF8, 0, offsetof(MatroskaTrack,name) },
398 { MATROSKA_ID_TRACKUID, EBML_UINT, 0, offsetof(MatroskaTrack,uid) },
399 { MATROSKA_ID_TRACKTYPE, EBML_UINT, 0, offsetof(MatroskaTrack,type) },
400 { MATROSKA_ID_CODECID, EBML_STR, 0, offsetof(MatroskaTrack,codec_id) },
401 { MATROSKA_ID_CODECPRIVATE, EBML_BIN, 0, offsetof(MatroskaTrack,codec_priv) },
402 { MATROSKA_ID_TRACKLANGUAGE, EBML_UTF8, 0, offsetof(MatroskaTrack,language), {.s="eng"} },
403 { MATROSKA_ID_TRACKDEFAULTDURATION, EBML_UINT, 0, offsetof(MatroskaTrack,default_duration) },
404 { MATROSKA_ID_TRACKTIMECODESCALE, EBML_FLOAT,0, offsetof(MatroskaTrack,time_scale), {.f=1.0} },
405 { MATROSKA_ID_TRACKFLAGDEFAULT, EBML_UINT, 0, offsetof(MatroskaTrack,flag_default), {.u=1} },
406 { MATROSKA_ID_TRACKFLAGFORCED, EBML_UINT, 0, offsetof(MatroskaTrack,flag_forced), {.u=0} },
407 { MATROSKA_ID_TRACKVIDEO, EBML_NEST, 0, offsetof(MatroskaTrack,video), {.n=matroska_track_video} },
408 { MATROSKA_ID_TRACKAUDIO, EBML_NEST, 0, offsetof(MatroskaTrack,audio), {.n=matroska_track_audio} },
409 { MATROSKA_ID_TRACKOPERATION, EBML_NEST, 0, offsetof(MatroskaTrack,operation), {.n=matroska_track_operation} },
410 { MATROSKA_ID_TRACKCONTENTENCODINGS,EBML_NEST, 0, 0, {.n=matroska_track_encodings} },
411 { MATROSKA_ID_TRACKMAXBLKADDID, EBML_UINT, 0, offsetof(MatroskaTrack,max_block_additional_id) },
412 { MATROSKA_ID_TRACKFLAGENABLED, EBML_NONE },
413 { MATROSKA_ID_TRACKFLAGLACING, EBML_NONE },
414 { MATROSKA_ID_CODECNAME, EBML_NONE },
415 { MATROSKA_ID_CODECDECODEALL, EBML_NONE },
416 { MATROSKA_ID_CODECINFOURL, EBML_NONE },
417 { MATROSKA_ID_CODECDOWNLOADURL, EBML_NONE },
418 { MATROSKA_ID_TRACKMINCACHE, EBML_NONE },
419 { MATROSKA_ID_TRACKMAXCACHE, EBML_NONE },
423 static EbmlSyntax matroska_tracks[] = {
424 { MATROSKA_ID_TRACKENTRY, EBML_NEST, sizeof(MatroskaTrack), offsetof(MatroskaDemuxContext,tracks), {.n=matroska_track} },
428 static EbmlSyntax matroska_attachment[] = {
429 { MATROSKA_ID_FILEUID, EBML_UINT, 0, offsetof(MatroskaAttachement,uid) },
430 { MATROSKA_ID_FILENAME, EBML_UTF8, 0, offsetof(MatroskaAttachement,filename) },
431 { MATROSKA_ID_FILEMIMETYPE, EBML_STR, 0, offsetof(MatroskaAttachement,mime) },
432 { MATROSKA_ID_FILEDATA, EBML_BIN, 0, offsetof(MatroskaAttachement,bin) },
433 { MATROSKA_ID_FILEDESC, EBML_NONE },
437 static EbmlSyntax matroska_attachments[] = {
438 { MATROSKA_ID_ATTACHEDFILE, EBML_NEST, sizeof(MatroskaAttachement), offsetof(MatroskaDemuxContext,attachments), {.n=matroska_attachment} },
442 static EbmlSyntax matroska_chapter_display[] = {
443 { MATROSKA_ID_CHAPSTRING, EBML_UTF8, 0, offsetof(MatroskaChapter,title) },
444 { MATROSKA_ID_CHAPLANG, EBML_NONE },
448 static EbmlSyntax matroska_chapter_entry[] = {
449 { MATROSKA_ID_CHAPTERTIMESTART, EBML_UINT, 0, offsetof(MatroskaChapter,start), {.u=AV_NOPTS_VALUE} },
450 { MATROSKA_ID_CHAPTERTIMEEND, EBML_UINT, 0, offsetof(MatroskaChapter,end), {.u=AV_NOPTS_VALUE} },
451 { MATROSKA_ID_CHAPTERUID, EBML_UINT, 0, offsetof(MatroskaChapter,uid) },
452 { MATROSKA_ID_CHAPTERDISPLAY, EBML_NEST, 0, 0, {.n=matroska_chapter_display} },
453 { MATROSKA_ID_CHAPTERFLAGHIDDEN, EBML_NONE },
454 { MATROSKA_ID_CHAPTERFLAGENABLED, EBML_NONE },
455 { MATROSKA_ID_CHAPTERPHYSEQUIV, EBML_NONE },
456 { MATROSKA_ID_CHAPTERATOM, EBML_NONE },
460 static EbmlSyntax matroska_chapter[] = {
461 { MATROSKA_ID_CHAPTERATOM, EBML_NEST, sizeof(MatroskaChapter), offsetof(MatroskaDemuxContext,chapters), {.n=matroska_chapter_entry} },
462 { MATROSKA_ID_EDITIONUID, EBML_NONE },
463 { MATROSKA_ID_EDITIONFLAGHIDDEN, EBML_NONE },
464 { MATROSKA_ID_EDITIONFLAGDEFAULT, EBML_NONE },
465 { MATROSKA_ID_EDITIONFLAGORDERED, EBML_NONE },
469 static EbmlSyntax matroska_chapters[] = {
470 { MATROSKA_ID_EDITIONENTRY, EBML_NEST, 0, 0, {.n=matroska_chapter} },
474 static EbmlSyntax matroska_index_pos[] = {
475 { MATROSKA_ID_CUETRACK, EBML_UINT, 0, offsetof(MatroskaIndexPos,track) },
476 { MATROSKA_ID_CUECLUSTERPOSITION, EBML_UINT, 0, offsetof(MatroskaIndexPos,pos) },
477 { MATROSKA_ID_CUEBLOCKNUMBER, EBML_NONE },
481 static EbmlSyntax matroska_index_entry[] = {
482 { MATROSKA_ID_CUETIME, EBML_UINT, 0, offsetof(MatroskaIndex,time) },
483 { MATROSKA_ID_CUETRACKPOSITION, EBML_NEST, sizeof(MatroskaIndexPos), offsetof(MatroskaIndex,pos), {.n=matroska_index_pos} },
487 static EbmlSyntax matroska_index[] = {
488 { MATROSKA_ID_POINTENTRY, EBML_NEST, sizeof(MatroskaIndex), offsetof(MatroskaDemuxContext,index), {.n=matroska_index_entry} },
492 static EbmlSyntax matroska_simpletag[] = {
493 { MATROSKA_ID_TAGNAME, EBML_UTF8, 0, offsetof(MatroskaTag,name) },
494 { MATROSKA_ID_TAGSTRING, EBML_UTF8, 0, offsetof(MatroskaTag,string) },
495 { MATROSKA_ID_TAGLANG, EBML_STR, 0, offsetof(MatroskaTag,lang), {.s="und"} },
496 { MATROSKA_ID_TAGDEFAULT, EBML_UINT, 0, offsetof(MatroskaTag,def) },
497 { MATROSKA_ID_TAGDEFAULT_BUG, EBML_UINT, 0, offsetof(MatroskaTag,def) },
498 { MATROSKA_ID_SIMPLETAG, EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTag,sub), {.n=matroska_simpletag} },
502 static EbmlSyntax matroska_tagtargets[] = {
503 { MATROSKA_ID_TAGTARGETS_TYPE, EBML_STR, 0, offsetof(MatroskaTagTarget,type) },
504 { MATROSKA_ID_TAGTARGETS_TYPEVALUE, EBML_UINT, 0, offsetof(MatroskaTagTarget,typevalue), {.u=50} },
505 { MATROSKA_ID_TAGTARGETS_TRACKUID, EBML_UINT, 0, offsetof(MatroskaTagTarget,trackuid) },
506 { MATROSKA_ID_TAGTARGETS_CHAPTERUID,EBML_UINT, 0, offsetof(MatroskaTagTarget,chapteruid) },
507 { MATROSKA_ID_TAGTARGETS_ATTACHUID, EBML_UINT, 0, offsetof(MatroskaTagTarget,attachuid) },
511 static EbmlSyntax matroska_tag[] = {
512 { MATROSKA_ID_SIMPLETAG, EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTags,tag), {.n=matroska_simpletag} },
513 { MATROSKA_ID_TAGTARGETS, EBML_NEST, 0, offsetof(MatroskaTags,target), {.n=matroska_tagtargets} },
517 static EbmlSyntax matroska_tags[] = {
518 { MATROSKA_ID_TAG, EBML_NEST, sizeof(MatroskaTags), offsetof(MatroskaDemuxContext,tags), {.n=matroska_tag} },
522 static EbmlSyntax matroska_seekhead_entry[] = {
523 { MATROSKA_ID_SEEKID, EBML_UINT, 0, offsetof(MatroskaSeekhead,id) },
524 { MATROSKA_ID_SEEKPOSITION, EBML_UINT, 0, offsetof(MatroskaSeekhead,pos), {.u=-1} },
528 static EbmlSyntax matroska_seekhead[] = {
529 { MATROSKA_ID_SEEKENTRY, EBML_NEST, sizeof(MatroskaSeekhead), offsetof(MatroskaDemuxContext,seekhead), {.n=matroska_seekhead_entry} },
533 static EbmlSyntax matroska_segment[] = {
534 { MATROSKA_ID_INFO, EBML_NEST, 0, 0, {.n=matroska_info } },
535 { MATROSKA_ID_TRACKS, EBML_NEST, 0, 0, {.n=matroska_tracks } },
536 { MATROSKA_ID_ATTACHMENTS, EBML_NEST, 0, 0, {.n=matroska_attachments} },
537 { MATROSKA_ID_CHAPTERS, EBML_NEST, 0, 0, {.n=matroska_chapters } },
538 { MATROSKA_ID_CUES, EBML_NEST, 0, 0, {.n=matroska_index } },
539 { MATROSKA_ID_TAGS, EBML_NEST, 0, 0, {.n=matroska_tags } },
540 { MATROSKA_ID_SEEKHEAD, EBML_NEST, 0, 0, {.n=matroska_seekhead } },
541 { MATROSKA_ID_CLUSTER, EBML_STOP },
545 static EbmlSyntax matroska_segments[] = {
546 { MATROSKA_ID_SEGMENT, EBML_NEST, 0, 0, {.n=matroska_segment } },
550 static EbmlSyntax matroska_blockmore[] = {
551 { MATROSKA_ID_BLOCKADDID, EBML_UINT, 0, offsetof(MatroskaBlock,additional_id) },
552 { MATROSKA_ID_BLOCKADDITIONAL, EBML_BIN, 0, offsetof(MatroskaBlock,additional) },
556 static EbmlSyntax matroska_blockadditions[] = {
557 { MATROSKA_ID_BLOCKMORE, EBML_NEST, 0, 0, {.n=matroska_blockmore} },
561 static EbmlSyntax matroska_blockgroup[] = {
562 { MATROSKA_ID_BLOCK, EBML_BIN, 0, offsetof(MatroskaBlock,bin) },
563 { MATROSKA_ID_BLOCKADDITIONS, EBML_NEST, 0, 0, {.n=matroska_blockadditions} },
564 { MATROSKA_ID_SIMPLEBLOCK, EBML_BIN, 0, offsetof(MatroskaBlock,bin) },
565 { MATROSKA_ID_BLOCKDURATION, EBML_UINT, 0, offsetof(MatroskaBlock,duration) },
566 { MATROSKA_ID_BLOCKREFERENCE, EBML_UINT, 0, offsetof(MatroskaBlock,reference) },
567 { 1, EBML_UINT, 0, offsetof(MatroskaBlock,non_simple), {.u=1} },
571 static EbmlSyntax matroska_cluster[] = {
572 { MATROSKA_ID_CLUSTERTIMECODE,EBML_UINT,0, offsetof(MatroskaCluster,timecode) },
573 { MATROSKA_ID_BLOCKGROUP, EBML_NEST, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
574 { MATROSKA_ID_SIMPLEBLOCK, EBML_PASS, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
575 { MATROSKA_ID_CLUSTERPOSITION,EBML_NONE },
576 { MATROSKA_ID_CLUSTERPREVSIZE,EBML_NONE },
580 static EbmlSyntax matroska_clusters[] = {
581 { MATROSKA_ID_CLUSTER, EBML_NEST, 0, 0, {.n=matroska_cluster} },
582 { MATROSKA_ID_INFO, EBML_NONE },
583 { MATROSKA_ID_CUES, EBML_NONE },
584 { MATROSKA_ID_TAGS, EBML_NONE },
585 { MATROSKA_ID_SEEKHEAD, EBML_NONE },
589 static EbmlSyntax matroska_cluster_incremental_parsing[] = {
590 { MATROSKA_ID_CLUSTERTIMECODE,EBML_UINT,0, offsetof(MatroskaCluster,timecode) },
591 { MATROSKA_ID_BLOCKGROUP, EBML_NEST, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
592 { MATROSKA_ID_SIMPLEBLOCK, EBML_PASS, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
593 { MATROSKA_ID_CLUSTERPOSITION,EBML_NONE },
594 { MATROSKA_ID_CLUSTERPREVSIZE,EBML_NONE },
595 { MATROSKA_ID_INFO, EBML_NONE },
596 { MATROSKA_ID_CUES, EBML_NONE },
597 { MATROSKA_ID_TAGS, EBML_NONE },
598 { MATROSKA_ID_SEEKHEAD, EBML_NONE },
599 { MATROSKA_ID_CLUSTER, EBML_STOP },
603 static EbmlSyntax matroska_cluster_incremental[] = {
604 { MATROSKA_ID_CLUSTERTIMECODE,EBML_UINT,0, offsetof(MatroskaCluster,timecode) },
605 { MATROSKA_ID_BLOCKGROUP, EBML_STOP },
606 { MATROSKA_ID_SIMPLEBLOCK, EBML_STOP },
607 { MATROSKA_ID_CLUSTERPOSITION,EBML_NONE },
608 { MATROSKA_ID_CLUSTERPREVSIZE,EBML_NONE },
612 static EbmlSyntax matroska_clusters_incremental[] = {
613 { MATROSKA_ID_CLUSTER, EBML_NEST, 0, 0, {.n=matroska_cluster_incremental} },
614 { MATROSKA_ID_INFO, EBML_NONE },
615 { MATROSKA_ID_CUES, EBML_NONE },
616 { MATROSKA_ID_TAGS, EBML_NONE },
617 { MATROSKA_ID_SEEKHEAD, EBML_NONE },
621 static const char *const matroska_doctypes[] = { "matroska", "webm" };
623 static int matroska_resync(MatroskaDemuxContext *matroska, int64_t last_pos)
625 AVIOContext *pb = matroska->ctx->pb;
627 matroska->current_id = 0;
628 matroska->num_levels = 0;
630 // seek to next position to resync from
631 if (avio_seek(pb, last_pos + 1, SEEK_SET) < 0 || avio_tell(pb) <= last_pos)
636 // try to find a toplevel element
637 while (!url_feof(pb)) {
638 if (id == MATROSKA_ID_INFO || id == MATROSKA_ID_TRACKS ||
639 id == MATROSKA_ID_CUES || id == MATROSKA_ID_TAGS ||
640 id == MATROSKA_ID_SEEKHEAD || id == MATROSKA_ID_ATTACHMENTS ||
641 id == MATROSKA_ID_CLUSTER || id == MATROSKA_ID_CHAPTERS)
643 matroska->current_id = id;
646 id = (id << 8) | avio_r8(pb);
654 * Return: Whether we reached the end of a level in the hierarchy or not.
656 static int ebml_level_end(MatroskaDemuxContext *matroska)
658 AVIOContext *pb = matroska->ctx->pb;
659 int64_t pos = avio_tell(pb);
661 if (matroska->num_levels > 0) {
662 MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
663 if (pos - level->start >= level->length || matroska->current_id) {
664 matroska->num_levels--;
672 * Read: an "EBML number", which is defined as a variable-length
673 * array of bytes. The first byte indicates the length by giving a
674 * number of 0-bits followed by a one. The position of the first
675 * "one" bit inside the first byte indicates the length of this
677 * Returns: number of bytes read, < 0 on error
679 static int ebml_read_num(MatroskaDemuxContext *matroska, AVIOContext *pb,
680 int max_size, uint64_t *number)
685 /* The first byte tells us the length in bytes - avio_r8() can normally
686 * return 0, but since that's not a valid first ebmlID byte, we can
687 * use it safely here to catch EOS. */
688 if (!(total = avio_r8(pb))) {
689 /* we might encounter EOS here */
691 int64_t pos = avio_tell(pb);
692 av_log(matroska->ctx, AV_LOG_ERROR,
693 "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
695 return pb->error ? pb->error : AVERROR(EIO);
700 /* get the length of the EBML number */
701 read = 8 - ff_log2_tab[total];
702 if (read > max_size) {
703 int64_t pos = avio_tell(pb) - 1;
704 av_log(matroska->ctx, AV_LOG_ERROR,
705 "Invalid EBML number size tag 0x%02x at pos %"PRIu64" (0x%"PRIx64")\n",
706 (uint8_t) total, pos, pos);
707 return AVERROR_INVALIDDATA;
710 /* read out length */
711 total ^= 1 << ff_log2_tab[total];
713 total = (total << 8) | avio_r8(pb);
721 * Read a EBML length value.
722 * This needs special handling for the "unknown length" case which has multiple
725 static int ebml_read_length(MatroskaDemuxContext *matroska, AVIOContext *pb,
728 int res = ebml_read_num(matroska, pb, 8, number);
729 if (res > 0 && *number + 1 == 1ULL << (7 * res))
730 *number = 0xffffffffffffffULL;
735 * Read the next element as an unsigned int.
736 * 0 is success, < 0 is failure.
738 static int ebml_read_uint(AVIOContext *pb, int size, uint64_t *num)
743 return AVERROR_INVALIDDATA;
745 /* big-endian ordering; build up number */
748 *num = (*num << 8) | avio_r8(pb);
754 * Read the next element as a float.
755 * 0 is success, < 0 is failure.
757 static int ebml_read_float(AVIOContext *pb, int size, double *num)
761 } else if (size == 4) {
762 *num = av_int2float(avio_rb32(pb));
763 } else if (size == 8){
764 *num = av_int2double(avio_rb64(pb));
766 return AVERROR_INVALIDDATA;
772 * Read the next element as an ASCII string.
773 * 0 is success, < 0 is failure.
775 static int ebml_read_ascii(AVIOContext *pb, int size, char **str)
779 /* EBML strings are usually not 0-terminated, so we allocate one
780 * byte more, read the string and NULL-terminate it ourselves. */
781 if (!(res = av_malloc(size + 1)))
782 return AVERROR(ENOMEM);
783 if (avio_read(pb, (uint8_t *) res, size) != size) {
795 * Read the next element as binary data.
796 * 0 is success, < 0 is failure.
798 static int ebml_read_binary(AVIOContext *pb, int length, EbmlBin *bin)
800 av_fast_padded_malloc(&bin->data, &bin->size, length);
802 return AVERROR(ENOMEM);
805 bin->pos = avio_tell(pb);
806 if (avio_read(pb, bin->data, length) != length) {
807 av_freep(&bin->data);
816 * Read the next element, but only the header. The contents
817 * are supposed to be sub-elements which can be read separately.
818 * 0 is success, < 0 is failure.
820 static int ebml_read_master(MatroskaDemuxContext *matroska, uint64_t length)
822 AVIOContext *pb = matroska->ctx->pb;
823 MatroskaLevel *level;
825 if (matroska->num_levels >= EBML_MAX_DEPTH) {
826 av_log(matroska->ctx, AV_LOG_ERROR,
827 "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
828 return AVERROR(ENOSYS);
831 level = &matroska->levels[matroska->num_levels++];
832 level->start = avio_tell(pb);
833 level->length = length;
839 * Read signed/unsigned "EBML" numbers.
840 * Return: number of bytes processed, < 0 on error
842 static int matroska_ebmlnum_uint(MatroskaDemuxContext *matroska,
843 uint8_t *data, uint32_t size, uint64_t *num)
846 ffio_init_context(&pb, data, size, 0, NULL, NULL, NULL, NULL);
847 return ebml_read_num(matroska, &pb, FFMIN(size, 8), num);
851 * Same as above, but signed.
853 static int matroska_ebmlnum_sint(MatroskaDemuxContext *matroska,
854 uint8_t *data, uint32_t size, int64_t *num)
859 /* read as unsigned number first */
860 if ((res = matroska_ebmlnum_uint(matroska, data, size, &unum)) < 0)
863 /* make signed (weird way) */
864 *num = unum - ((1LL << (7*res - 1)) - 1);
869 static int ebml_parse_elem(MatroskaDemuxContext *matroska,
870 EbmlSyntax *syntax, void *data);
872 static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
873 uint32_t id, void *data)
876 for (i=0; syntax[i].id; i++)
877 if (id == syntax[i].id)
879 if (!syntax[i].id && id == MATROSKA_ID_CLUSTER &&
880 matroska->num_levels > 0 &&
881 matroska->levels[matroska->num_levels-1].length == 0xffffffffffffff)
882 return 0; // we reached the end of an unknown size cluster
883 if (!syntax[i].id && id != EBML_ID_VOID && id != EBML_ID_CRC32) {
884 av_log(matroska->ctx, AV_LOG_INFO, "Unknown entry 0x%X\n", id);
885 if (matroska->ctx->error_recognition & AV_EF_EXPLODE)
886 return AVERROR_INVALIDDATA;
888 return ebml_parse_elem(matroska, &syntax[i], data);
891 static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
894 if (!matroska->current_id) {
896 int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id);
899 matroska->current_id = id | 1 << 7*res;
901 return ebml_parse_id(matroska, syntax, matroska->current_id, data);
904 static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
909 for (i=0; syntax[i].id; i++)
910 switch (syntax[i].type) {
912 *(uint64_t *)((char *)data+syntax[i].data_offset) = syntax[i].def.u;
915 *(double *)((char *)data+syntax[i].data_offset) = syntax[i].def.f;
919 *(char **)((char *)data+syntax[i].data_offset) = av_strdup(syntax[i].def.s);
923 while (!res && !ebml_level_end(matroska))
924 res = ebml_parse(matroska, syntax, data);
929 static int ebml_parse_elem(MatroskaDemuxContext *matroska,
930 EbmlSyntax *syntax, void *data)
932 static const uint64_t max_lengths[EBML_TYPE_COUNT] = {
935 // max. 16 MB for strings
936 [EBML_STR] = 0x1000000,
937 [EBML_UTF8] = 0x1000000,
938 // max. 256 MB for binary data
939 [EBML_BIN] = 0x10000000,
940 // no limits for anything else
942 AVIOContext *pb = matroska->ctx->pb;
943 uint32_t id = syntax->id;
948 data = (char *)data + syntax->data_offset;
949 if (syntax->list_elem_size) {
950 EbmlList *list = data;
951 newelem = av_realloc(list->elem, (list->nb_elem+1)*syntax->list_elem_size);
953 return AVERROR(ENOMEM);
954 list->elem = newelem;
955 data = (char*)list->elem + list->nb_elem*syntax->list_elem_size;
956 memset(data, 0, syntax->list_elem_size);
960 if (syntax->type != EBML_PASS && syntax->type != EBML_STOP) {
961 matroska->current_id = 0;
962 if ((res = ebml_read_length(matroska, pb, &length)) < 0)
964 if (max_lengths[syntax->type] && length > max_lengths[syntax->type]) {
965 av_log(matroska->ctx, AV_LOG_ERROR,
966 "Invalid length 0x%"PRIx64" > 0x%"PRIx64" for syntax element %i\n",
967 length, max_lengths[syntax->type], syntax->type);
968 return AVERROR_INVALIDDATA;
972 switch (syntax->type) {
973 case EBML_UINT: res = ebml_read_uint (pb, length, data); break;
974 case EBML_FLOAT: res = ebml_read_float (pb, length, data); break;
976 case EBML_UTF8: res = ebml_read_ascii (pb, length, data); break;
977 case EBML_BIN: res = ebml_read_binary(pb, length, data); break;
978 case EBML_NEST: if ((res=ebml_read_master(matroska, length)) < 0)
980 if (id == MATROSKA_ID_SEGMENT)
981 matroska->segment_start = avio_tell(matroska->ctx->pb);
982 return ebml_parse_nest(matroska, syntax->def.n, data);
983 case EBML_PASS: return ebml_parse_id(matroska, syntax->def.n, id, data);
984 case EBML_STOP: return 1;
986 if(ffio_limit(pb, length) != length)
988 return avio_skip(pb,length)<0 ? AVERROR(EIO) : 0;
990 if (res == AVERROR_INVALIDDATA)
991 av_log(matroska->ctx, AV_LOG_ERROR, "Invalid element\n");
992 else if (res == AVERROR(EIO))
993 av_log(matroska->ctx, AV_LOG_ERROR, "Read error\n");
997 static void ebml_free(EbmlSyntax *syntax, void *data)
1000 for (i=0; syntax[i].id; i++) {
1001 void *data_off = (char *)data + syntax[i].data_offset;
1002 switch (syntax[i].type) {
1004 case EBML_UTF8: av_freep(data_off); break;
1005 case EBML_BIN: av_freep(&((EbmlBin *)data_off)->data); break;
1007 if (syntax[i].list_elem_size) {
1008 EbmlList *list = data_off;
1009 char *ptr = list->elem;
1010 for (j=0; j<list->nb_elem; j++, ptr+=syntax[i].list_elem_size)
1011 ebml_free(syntax[i].def.n, ptr);
1012 av_free(list->elem);
1014 ebml_free(syntax[i].def.n, data_off);
1024 static int matroska_probe(AVProbeData *p)
1027 int len_mask = 0x80, size = 1, n = 1, i;
1030 if (AV_RB32(p->buf) != EBML_ID_HEADER)
1033 /* length of header */
1035 while (size <= 8 && !(total & len_mask)) {
1041 total &= (len_mask - 1);
1043 total = (total << 8) | p->buf[4 + n++];
1045 /* Does the probe data contain the whole header? */
1046 if (p->buf_size < 4 + size + total)
1049 /* The header should contain a known document type. For now,
1050 * we don't parse the whole header but simply check for the
1051 * availability of that array of characters inside the header.
1052 * Not fully fool-proof, but good enough. */
1053 for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++) {
1054 int probelen = strlen(matroska_doctypes[i]);
1055 if (total < probelen)
1057 for (n = 4+size; n <= 4+size+total-probelen; n++)
1058 if (!memcmp(p->buf+n, matroska_doctypes[i], probelen))
1059 return AVPROBE_SCORE_MAX;
1062 // probably valid EBML header but no recognized doctype
1063 return AVPROBE_SCORE_EXTENSION;
1066 static MatroskaTrack *matroska_find_track_by_num(MatroskaDemuxContext *matroska,
1069 MatroskaTrack *tracks = matroska->tracks.elem;
1072 for (i=0; i < matroska->tracks.nb_elem; i++)
1073 if (tracks[i].num == num)
1076 av_log(matroska->ctx, AV_LOG_ERROR, "Invalid track number %d\n", num);
1080 static int matroska_decode_buffer(uint8_t** buf, int* buf_size,
1081 MatroskaTrack *track)
1083 MatroskaTrackEncoding *encodings = track->encodings.elem;
1084 uint8_t* data = *buf;
1085 int isize = *buf_size;
1086 uint8_t* pkt_data = NULL;
1087 uint8_t av_unused *newpktdata;
1088 int pkt_size = isize;
1092 if (pkt_size >= 10000000U)
1093 return AVERROR_INVALIDDATA;
1095 switch (encodings[0].compression.algo) {
1096 case MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP: {
1097 int header_size = encodings[0].compression.settings.size;
1098 uint8_t *header = encodings[0].compression.settings.data;
1100 if (header_size && !header) {
1101 av_log(NULL, AV_LOG_ERROR, "Compression size but no data in headerstrip\n");
1108 pkt_size = isize + header_size;
1109 pkt_data = av_malloc(pkt_size);
1111 return AVERROR(ENOMEM);
1113 memcpy(pkt_data, header, header_size);
1114 memcpy(pkt_data + header_size, data, isize);
1118 case MATROSKA_TRACK_ENCODING_COMP_LZO:
1120 olen = pkt_size *= 3;
1121 newpktdata = av_realloc(pkt_data, pkt_size + AV_LZO_OUTPUT_PADDING);
1123 result = AVERROR(ENOMEM);
1126 pkt_data = newpktdata;
1127 result = av_lzo1x_decode(pkt_data, &olen, data, &isize);
1128 } while (result==AV_LZO_OUTPUT_FULL && pkt_size<10000000);
1130 result = AVERROR_INVALIDDATA;
1137 case MATROSKA_TRACK_ENCODING_COMP_ZLIB: {
1138 z_stream zstream = {0};
1139 if (inflateInit(&zstream) != Z_OK)
1141 zstream.next_in = data;
1142 zstream.avail_in = isize;
1145 newpktdata = av_realloc(pkt_data, pkt_size);
1147 inflateEnd(&zstream);
1150 pkt_data = newpktdata;
1151 zstream.avail_out = pkt_size - zstream.total_out;
1152 zstream.next_out = pkt_data + zstream.total_out;
1154 result = inflate(&zstream, Z_NO_FLUSH);
1156 result = Z_MEM_ERROR;
1157 } while (result==Z_OK && pkt_size<10000000);
1158 pkt_size = zstream.total_out;
1159 inflateEnd(&zstream);
1160 if (result != Z_STREAM_END) {
1161 if (result == Z_MEM_ERROR)
1162 result = AVERROR(ENOMEM);
1164 result = AVERROR_INVALIDDATA;
1171 case MATROSKA_TRACK_ENCODING_COMP_BZLIB: {
1172 bz_stream bzstream = {0};
1173 if (BZ2_bzDecompressInit(&bzstream, 0, 0) != BZ_OK)
1175 bzstream.next_in = data;
1176 bzstream.avail_in = isize;
1179 newpktdata = av_realloc(pkt_data, pkt_size);
1181 BZ2_bzDecompressEnd(&bzstream);
1184 pkt_data = newpktdata;
1185 bzstream.avail_out = pkt_size - bzstream.total_out_lo32;
1186 bzstream.next_out = pkt_data + bzstream.total_out_lo32;
1188 result = BZ2_bzDecompress(&bzstream);
1190 result = BZ_MEM_ERROR;
1191 } while (result==BZ_OK && pkt_size<10000000);
1192 pkt_size = bzstream.total_out_lo32;
1193 BZ2_bzDecompressEnd(&bzstream);
1194 if (result != BZ_STREAM_END) {
1195 if (result == BZ_MEM_ERROR)
1196 result = AVERROR(ENOMEM);
1198 result = AVERROR_INVALIDDATA;
1205 return AVERROR_INVALIDDATA;
1209 *buf_size = pkt_size;
1217 static void matroska_fix_ass_packet(MatroskaDemuxContext *matroska,
1218 AVPacket *pkt, uint64_t display_duration)
1221 char *layer, *ptr = pkt->data, *end = ptr+pkt->size;
1222 for (; *ptr!=',' && ptr<end-1; ptr++);
1226 for (; *ptr!=',' && ptr<end-1; ptr++);
1228 int64_t end_pts = pkt->pts + display_duration;
1229 int sc = matroska->time_scale * pkt->pts / 10000000;
1230 int ec = matroska->time_scale * end_pts / 10000000;
1231 int sh, sm, ss, eh, em, es, len;
1232 sh = sc/360000; sc -= 360000*sh;
1233 sm = sc/ 6000; sc -= 6000*sm;
1234 ss = sc/ 100; sc -= 100*ss;
1235 eh = ec/360000; ec -= 360000*eh;
1236 em = ec/ 6000; ec -= 6000*em;
1237 es = ec/ 100; ec -= 100*es;
1239 len = 50 + end-ptr + FF_INPUT_BUFFER_PADDING_SIZE;
1240 if (!(line = av_buffer_alloc(len)))
1242 snprintf(line->data, len,"Dialogue: %s,%d:%02d:%02d.%02d,%d:%02d:%02d.%02d,%s\r\n",
1243 layer, sh, sm, ss, sc, eh, em, es, ec, ptr);
1244 av_buffer_unref(&pkt->buf);
1246 pkt->data = line->data;
1247 pkt->size = strlen(line->data);
1251 static int matroska_merge_packets(AVPacket *out, AVPacket *in)
1253 int ret = av_grow_packet(out, in->size);
1257 memcpy(out->data + out->size - in->size, in->data, in->size);
1265 static void matroska_convert_tag(AVFormatContext *s, EbmlList *list,
1266 AVDictionary **metadata, char *prefix)
1268 MatroskaTag *tags = list->elem;
1272 for (i=0; i < list->nb_elem; i++) {
1273 const char *lang= (tags[i].lang && strcmp(tags[i].lang, "und")) ? tags[i].lang : NULL;
1275 if (!tags[i].name) {
1276 av_log(s, AV_LOG_WARNING, "Skipping invalid tag with no TagName.\n");
1279 if (prefix) snprintf(key, sizeof(key), "%s/%s", prefix, tags[i].name);
1280 else av_strlcpy(key, tags[i].name, sizeof(key));
1281 if (tags[i].def || !lang) {
1282 av_dict_set(metadata, key, tags[i].string, 0);
1283 if (tags[i].sub.nb_elem)
1284 matroska_convert_tag(s, &tags[i].sub, metadata, key);
1287 av_strlcat(key, "-", sizeof(key));
1288 av_strlcat(key, lang, sizeof(key));
1289 av_dict_set(metadata, key, tags[i].string, 0);
1290 if (tags[i].sub.nb_elem)
1291 matroska_convert_tag(s, &tags[i].sub, metadata, key);
1294 ff_metadata_conv(metadata, NULL, ff_mkv_metadata_conv);
1297 static void matroska_convert_tags(AVFormatContext *s)
1299 MatroskaDemuxContext *matroska = s->priv_data;
1300 MatroskaTags *tags = matroska->tags.elem;
1303 for (i=0; i < matroska->tags.nb_elem; i++) {
1304 if (tags[i].target.attachuid) {
1305 MatroskaAttachement *attachment = matroska->attachments.elem;
1306 for (j=0; j<matroska->attachments.nb_elem; j++)
1307 if (attachment[j].uid == tags[i].target.attachuid
1308 && attachment[j].stream)
1309 matroska_convert_tag(s, &tags[i].tag,
1310 &attachment[j].stream->metadata, NULL);
1311 } else if (tags[i].target.chapteruid) {
1312 MatroskaChapter *chapter = matroska->chapters.elem;
1313 for (j=0; j<matroska->chapters.nb_elem; j++)
1314 if (chapter[j].uid == tags[i].target.chapteruid
1315 && chapter[j].chapter)
1316 matroska_convert_tag(s, &tags[i].tag,
1317 &chapter[j].chapter->metadata, NULL);
1318 } else if (tags[i].target.trackuid) {
1319 MatroskaTrack *track = matroska->tracks.elem;
1320 for (j=0; j<matroska->tracks.nb_elem; j++)
1321 if (track[j].uid == tags[i].target.trackuid && track[j].stream)
1322 matroska_convert_tag(s, &tags[i].tag,
1323 &track[j].stream->metadata, NULL);
1325 matroska_convert_tag(s, &tags[i].tag, &s->metadata,
1326 tags[i].target.type);
1331 static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska, int idx)
1333 EbmlList *seekhead_list = &matroska->seekhead;
1334 MatroskaSeekhead *seekhead = seekhead_list->elem;
1335 uint32_t level_up = matroska->level_up;
1336 int64_t before_pos = avio_tell(matroska->ctx->pb);
1337 uint32_t saved_id = matroska->current_id;
1338 MatroskaLevel level;
1342 if (idx >= seekhead_list->nb_elem
1343 || seekhead[idx].id == MATROSKA_ID_SEEKHEAD
1344 || seekhead[idx].id == MATROSKA_ID_CLUSTER)
1348 offset = seekhead[idx].pos + matroska->segment_start;
1349 if (avio_seek(matroska->ctx->pb, offset, SEEK_SET) == offset) {
1350 /* We don't want to lose our seekhead level, so we add
1351 * a dummy. This is a crude hack. */
1352 if (matroska->num_levels == EBML_MAX_DEPTH) {
1353 av_log(matroska->ctx, AV_LOG_INFO,
1354 "Max EBML element depth (%d) reached, "
1355 "cannot parse further.\n", EBML_MAX_DEPTH);
1356 ret = AVERROR_INVALIDDATA;
1359 level.length = (uint64_t)-1;
1360 matroska->levels[matroska->num_levels] = level;
1361 matroska->num_levels++;
1362 matroska->current_id = 0;
1364 ret = ebml_parse(matroska, matroska_segment, matroska);
1366 /* remove dummy level */
1367 while (matroska->num_levels) {
1368 uint64_t length = matroska->levels[--matroska->num_levels].length;
1369 if (length == (uint64_t)-1)
1375 avio_seek(matroska->ctx->pb, before_pos, SEEK_SET);
1376 matroska->level_up = level_up;
1377 matroska->current_id = saved_id;
1382 static void matroska_execute_seekhead(MatroskaDemuxContext *matroska)
1384 EbmlList *seekhead_list = &matroska->seekhead;
1385 int64_t before_pos = avio_tell(matroska->ctx->pb);
1388 // we should not do any seeking in the streaming case
1389 if (!matroska->ctx->pb->seekable ||
1390 (matroska->ctx->flags & AVFMT_FLAG_IGNIDX))
1393 for (i = 0; i < seekhead_list->nb_elem; i++) {
1394 MatroskaSeekhead *seekhead = seekhead_list->elem;
1395 if (seekhead[i].pos <= before_pos)
1398 // defer cues parsing until we actually need cue data.
1399 if (seekhead[i].id == MATROSKA_ID_CUES) {
1400 matroska->cues_parsing_deferred = 1;
1404 if (matroska_parse_seekhead_entry(matroska, i) < 0) {
1405 // mark index as broken
1406 matroska->cues_parsing_deferred = -1;
1412 static void matroska_add_index_entries(MatroskaDemuxContext *matroska) {
1413 EbmlList *index_list;
1414 MatroskaIndex *index;
1415 int index_scale = 1;
1418 index_list = &matroska->index;
1419 index = index_list->elem;
1420 if (index_list->nb_elem
1421 && index[0].time > 1E14/matroska->time_scale) {
1422 av_log(matroska->ctx, AV_LOG_WARNING, "Working around broken index.\n");
1423 index_scale = matroska->time_scale;
1425 for (i = 0; i < index_list->nb_elem; i++) {
1426 EbmlList *pos_list = &index[i].pos;
1427 MatroskaIndexPos *pos = pos_list->elem;
1428 for (j = 0; j < pos_list->nb_elem; j++) {
1429 MatroskaTrack *track = matroska_find_track_by_num(matroska, pos[j].track);
1430 if (track && track->stream)
1431 av_add_index_entry(track->stream,
1432 pos[j].pos + matroska->segment_start,
1433 index[i].time/index_scale, 0, 0,
1439 static void matroska_parse_cues(MatroskaDemuxContext *matroska) {
1440 EbmlList *seekhead_list = &matroska->seekhead;
1441 MatroskaSeekhead *seekhead = seekhead_list->elem;
1444 for (i = 0; i < seekhead_list->nb_elem; i++)
1445 if (seekhead[i].id == MATROSKA_ID_CUES)
1447 av_assert1(i <= seekhead_list->nb_elem);
1449 if (matroska_parse_seekhead_entry(matroska, i) < 0)
1450 matroska->cues_parsing_deferred = -1;
1451 matroska_add_index_entries(matroska);
1454 static int matroska_aac_profile(char *codec_id)
1456 static const char * const aac_profiles[] = { "MAIN", "LC", "SSR" };
1459 for (profile=0; profile<FF_ARRAY_ELEMS(aac_profiles); profile++)
1460 if (strstr(codec_id, aac_profiles[profile]))
1465 static int matroska_aac_sri(int samplerate)
1469 for (sri=0; sri<FF_ARRAY_ELEMS(avpriv_mpeg4audio_sample_rates); sri++)
1470 if (avpriv_mpeg4audio_sample_rates[sri] == samplerate)
1475 static void matroska_metadata_creation_time(AVDictionary **metadata, int64_t date_utc)
1478 /* Convert to seconds and adjust by number of seconds between 2001-01-01 and Epoch */
1479 time_t creation_time = date_utc / 1000000000 + 978307200;
1480 struct tm *ptm = gmtime(&creation_time);
1482 strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", ptm);
1483 av_dict_set(metadata, "creation_time", buffer, 0);
1486 static int matroska_read_header(AVFormatContext *s)
1488 MatroskaDemuxContext *matroska = s->priv_data;
1489 EbmlList *attachements_list = &matroska->attachments;
1490 MatroskaAttachement *attachements;
1491 EbmlList *chapters_list = &matroska->chapters;
1492 MatroskaChapter *chapters;
1493 MatroskaTrack *tracks;
1494 uint64_t max_start = 0;
1502 /* First read the EBML header. */
1503 if (ebml_parse(matroska, ebml_syntax, &ebml)
1504 || ebml.version > EBML_VERSION || ebml.max_size > sizeof(uint64_t)
1505 || ebml.id_length > sizeof(uint32_t) || ebml.doctype_version > 3 || !ebml.doctype) {
1506 av_log(matroska->ctx, AV_LOG_ERROR,
1507 "EBML header using unsupported features\n"
1508 "(EBML version %"PRIu64", doctype %s, doc version %"PRIu64")\n",
1509 ebml.version, ebml.doctype, ebml.doctype_version);
1510 ebml_free(ebml_syntax, &ebml);
1511 return AVERROR_PATCHWELCOME;
1512 } else if (ebml.doctype_version == 3) {
1513 av_log(matroska->ctx, AV_LOG_WARNING,
1514 "EBML header using unsupported features\n"
1515 "(EBML version %"PRIu64", doctype %s, doc version %"PRIu64")\n",
1516 ebml.version, ebml.doctype, ebml.doctype_version);
1518 for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++)
1519 if (!strcmp(ebml.doctype, matroska_doctypes[i]))
1521 if (i >= FF_ARRAY_ELEMS(matroska_doctypes)) {
1522 av_log(s, AV_LOG_WARNING, "Unknown EBML doctype '%s'\n", ebml.doctype);
1523 if (matroska->ctx->error_recognition & AV_EF_EXPLODE) {
1524 ebml_free(ebml_syntax, &ebml);
1525 return AVERROR_INVALIDDATA;
1528 ebml_free(ebml_syntax, &ebml);
1530 /* The next thing is a segment. */
1531 pos = avio_tell(matroska->ctx->pb);
1532 res = ebml_parse(matroska, matroska_segments, matroska);
1533 // try resyncing until we find a EBML_STOP type element.
1535 res = matroska_resync(matroska, pos);
1538 pos = avio_tell(matroska->ctx->pb);
1539 res = ebml_parse(matroska, matroska_segment, matroska);
1541 matroska_execute_seekhead(matroska);
1543 if (!matroska->time_scale)
1544 matroska->time_scale = 1000000;
1545 if (matroska->duration)
1546 matroska->ctx->duration = matroska->duration * matroska->time_scale
1547 * 1000 / AV_TIME_BASE;
1548 av_dict_set(&s->metadata, "title", matroska->title, 0);
1550 if (matroska->date_utc.size == 8)
1551 matroska_metadata_creation_time(&s->metadata, AV_RB64(matroska->date_utc.data));
1553 tracks = matroska->tracks.elem;
1554 for (i=0; i < matroska->tracks.nb_elem; i++) {
1555 MatroskaTrack *track = &tracks[i];
1556 enum AVCodecID codec_id = AV_CODEC_ID_NONE;
1557 EbmlList *encodings_list = &track->encodings;
1558 MatroskaTrackEncoding *encodings = encodings_list->elem;
1559 uint8_t *extradata = NULL;
1560 int extradata_size = 0;
1561 int extradata_offset = 0;
1562 uint32_t fourcc = 0;
1564 char* key_id_base64 = NULL;
1566 /* Apply some sanity checks. */
1567 if (track->type != MATROSKA_TRACK_TYPE_VIDEO &&
1568 track->type != MATROSKA_TRACK_TYPE_AUDIO &&
1569 track->type != MATROSKA_TRACK_TYPE_SUBTITLE) {
1570 av_log(matroska->ctx, AV_LOG_INFO,
1571 "Unknown or unsupported track type %"PRIu64"\n",
1575 if (track->codec_id == NULL)
1578 if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
1579 if (!track->default_duration && track->video.frame_rate > 0)
1580 track->default_duration = 1000000000/track->video.frame_rate;
1581 if (track->video.display_width == -1)
1582 track->video.display_width = track->video.pixel_width;
1583 if (track->video.display_height == -1)
1584 track->video.display_height = track->video.pixel_height;
1585 if (track->video.color_space.size == 4)
1586 fourcc = AV_RL32(track->video.color_space.data);
1587 } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
1588 if (!track->audio.out_samplerate)
1589 track->audio.out_samplerate = track->audio.samplerate;
1591 if (encodings_list->nb_elem > 1) {
1592 av_log(matroska->ctx, AV_LOG_ERROR,
1593 "Multiple combined encodings not supported");
1594 } else if (encodings_list->nb_elem == 1) {
1595 if (encodings[0].type) {
1596 if (encodings[0].encryption.key_id.size > 0) {
1597 /* Save the encryption key id to be stored later as a
1599 const int b64_size = AV_BASE64_SIZE(encodings[0].encryption.key_id.size);
1600 key_id_base64 = av_malloc(b64_size);
1601 if (key_id_base64 == NULL)
1602 return AVERROR(ENOMEM);
1604 av_base64_encode(key_id_base64, b64_size,
1605 encodings[0].encryption.key_id.data,
1606 encodings[0].encryption.key_id.size);
1608 encodings[0].scope = 0;
1609 av_log(matroska->ctx, AV_LOG_ERROR,
1610 "Unsupported encoding type");
1614 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_ZLIB &&
1617 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_BZLIB &&
1620 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_LZO &&
1622 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP) {
1623 encodings[0].scope = 0;
1624 av_log(matroska->ctx, AV_LOG_ERROR,
1625 "Unsupported encoding type");
1626 } else if (track->codec_priv.size && encodings[0].scope&2) {
1627 uint8_t *codec_priv = track->codec_priv.data;
1628 int ret = matroska_decode_buffer(&track->codec_priv.data,
1629 &track->codec_priv.size,
1632 track->codec_priv.data = NULL;
1633 track->codec_priv.size = 0;
1634 av_log(matroska->ctx, AV_LOG_ERROR,
1635 "Failed to decode codec private data\n");
1638 if (codec_priv != track->codec_priv.data)
1639 av_free(codec_priv);
1643 for(j=0; ff_mkv_codec_tags[j].id != AV_CODEC_ID_NONE; j++){
1644 if(!strncmp(ff_mkv_codec_tags[j].str, track->codec_id,
1645 strlen(ff_mkv_codec_tags[j].str))){
1646 codec_id= ff_mkv_codec_tags[j].id;
1651 st = track->stream = avformat_new_stream(s, NULL);
1653 av_free(key_id_base64);
1654 return AVERROR(ENOMEM);
1657 if (key_id_base64) {
1658 /* export encryption key id as base64 metadata tag */
1659 av_dict_set(&st->metadata, "enc_key_id", key_id_base64, 0);
1660 av_freep(&key_id_base64);
1663 if (!strcmp(track->codec_id, "V_MS/VFW/FOURCC")
1664 && track->codec_priv.size >= 40
1665 && track->codec_priv.data != NULL) {
1666 track->ms_compat = 1;
1667 fourcc = AV_RL32(track->codec_priv.data + 16);
1668 codec_id = ff_codec_get_id(ff_codec_bmp_tags, fourcc);
1669 extradata_offset = 40;
1670 } else if (!strcmp(track->codec_id, "A_MS/ACM")
1671 && track->codec_priv.size >= 14
1672 && track->codec_priv.data != NULL) {
1674 ffio_init_context(&b, track->codec_priv.data, track->codec_priv.size,
1675 0, NULL, NULL, NULL, NULL);
1676 ret = ff_get_wav_header(&b, st->codec, track->codec_priv.size);
1679 codec_id = st->codec->codec_id;
1680 extradata_offset = FFMIN(track->codec_priv.size, 18);
1681 } else if (!strcmp(track->codec_id, "V_QUICKTIME")
1682 && (track->codec_priv.size >= 86)
1683 && (track->codec_priv.data != NULL)) {
1684 fourcc = AV_RL32(track->codec_priv.data);
1685 codec_id = ff_codec_get_id(ff_codec_movvideo_tags, fourcc);
1686 } else if (codec_id == AV_CODEC_ID_ALAC && track->codec_priv.size && track->codec_priv.size < INT_MAX - 12 - FF_INPUT_BUFFER_PADDING_SIZE) {
1687 /* Only ALAC's magic cookie is stored in Matroska's track headers.
1688 Create the "atom size", "tag", and "tag version" fields the
1689 decoder expects manually. */
1690 extradata_size = 12 + track->codec_priv.size;
1691 extradata = av_mallocz(extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
1692 if (extradata == NULL)
1693 return AVERROR(ENOMEM);
1694 AV_WB32(extradata, extradata_size);
1695 memcpy(&extradata[4], "alac", 4);
1696 AV_WB32(&extradata[8], 0);
1697 memcpy(&extradata[12], track->codec_priv.data, track->codec_priv.size);
1698 } else if (codec_id == AV_CODEC_ID_PCM_S16BE) {
1699 switch (track->audio.bitdepth) {
1700 case 8: codec_id = AV_CODEC_ID_PCM_U8; break;
1701 case 24: codec_id = AV_CODEC_ID_PCM_S24BE; break;
1702 case 32: codec_id = AV_CODEC_ID_PCM_S32BE; break;
1704 } else if (codec_id == AV_CODEC_ID_PCM_S16LE) {
1705 switch (track->audio.bitdepth) {
1706 case 8: codec_id = AV_CODEC_ID_PCM_U8; break;
1707 case 24: codec_id = AV_CODEC_ID_PCM_S24LE; break;
1708 case 32: codec_id = AV_CODEC_ID_PCM_S32LE; break;
1710 } else if (codec_id==AV_CODEC_ID_PCM_F32LE && track->audio.bitdepth==64) {
1711 codec_id = AV_CODEC_ID_PCM_F64LE;
1712 } else if (codec_id == AV_CODEC_ID_AAC && !track->codec_priv.size) {
1713 int profile = matroska_aac_profile(track->codec_id);
1714 int sri = matroska_aac_sri(track->audio.samplerate);
1715 extradata = av_mallocz(5 + FF_INPUT_BUFFER_PADDING_SIZE);
1716 if (extradata == NULL)
1717 return AVERROR(ENOMEM);
1718 extradata[0] = (profile << 3) | ((sri&0x0E) >> 1);
1719 extradata[1] = ((sri&0x01) << 7) | (track->audio.channels<<3);
1720 if (strstr(track->codec_id, "SBR")) {
1721 sri = matroska_aac_sri(track->audio.out_samplerate);
1722 extradata[2] = 0x56;
1723 extradata[3] = 0xE5;
1724 extradata[4] = 0x80 | (sri<<3);
1728 } else if (codec_id == AV_CODEC_ID_TTA) {
1729 extradata_size = 30;
1730 extradata = av_mallocz(extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
1731 if (extradata == NULL)
1732 return AVERROR(ENOMEM);
1733 ffio_init_context(&b, extradata, extradata_size, 1,
1734 NULL, NULL, NULL, NULL);
1735 avio_write(&b, "TTA1", 4);
1737 avio_wl16(&b, track->audio.channels);
1738 avio_wl16(&b, track->audio.bitdepth);
1739 avio_wl32(&b, track->audio.out_samplerate);
1740 avio_wl32(&b, matroska->ctx->duration * track->audio.out_samplerate);
1741 } else if (codec_id == AV_CODEC_ID_RV10 || codec_id == AV_CODEC_ID_RV20 ||
1742 codec_id == AV_CODEC_ID_RV30 || codec_id == AV_CODEC_ID_RV40) {
1743 extradata_offset = 26;
1744 } else if (codec_id == AV_CODEC_ID_RA_144) {
1745 track->audio.out_samplerate = 8000;
1746 track->audio.channels = 1;
1747 } else if ((codec_id == AV_CODEC_ID_RA_288 || codec_id == AV_CODEC_ID_COOK ||
1748 codec_id == AV_CODEC_ID_ATRAC3 || codec_id == AV_CODEC_ID_SIPR)
1749 && track->codec_priv.data) {
1752 ffio_init_context(&b, track->codec_priv.data,track->codec_priv.size,
1753 0, NULL, NULL, NULL, NULL);
1755 flavor = avio_rb16(&b);
1756 track->audio.coded_framesize = avio_rb32(&b);
1758 track->audio.sub_packet_h = avio_rb16(&b);
1759 track->audio.frame_size = avio_rb16(&b);
1760 track->audio.sub_packet_size = avio_rb16(&b);
1761 track->audio.buf = av_malloc(track->audio.frame_size * track->audio.sub_packet_h);
1762 if (codec_id == AV_CODEC_ID_RA_288) {
1763 st->codec->block_align = track->audio.coded_framesize;
1764 track->codec_priv.size = 0;
1766 if (codec_id == AV_CODEC_ID_SIPR && flavor < 4) {
1767 const int sipr_bit_rate[4] = { 6504, 8496, 5000, 16000 };
1768 track->audio.sub_packet_size = ff_sipr_subpk_size[flavor];
1769 st->codec->bit_rate = sipr_bit_rate[flavor];
1771 st->codec->block_align = track->audio.sub_packet_size;
1772 extradata_offset = 78;
1775 track->codec_priv.size -= extradata_offset;
1777 if (codec_id == AV_CODEC_ID_NONE)
1778 av_log(matroska->ctx, AV_LOG_INFO,
1779 "Unknown/unsupported AVCodecID %s.\n", track->codec_id);
1781 if (track->time_scale < 0.01)
1782 track->time_scale = 1.0;
1783 avpriv_set_pts_info(st, 64, matroska->time_scale*track->time_scale, 1000*1000*1000); /* 64 bit pts in ns */
1785 st->codec->codec_id = codec_id;
1787 if (strcmp(track->language, "und"))
1788 av_dict_set(&st->metadata, "language", track->language, 0);
1789 av_dict_set(&st->metadata, "title", track->name, 0);
1791 if (track->flag_default)
1792 st->disposition |= AV_DISPOSITION_DEFAULT;
1793 if (track->flag_forced)
1794 st->disposition |= AV_DISPOSITION_FORCED;
1796 if (!st->codec->extradata) {
1798 st->codec->extradata = extradata;
1799 st->codec->extradata_size = extradata_size;
1800 } else if(track->codec_priv.data && track->codec_priv.size > 0){
1801 st->codec->extradata = av_mallocz(track->codec_priv.size +
1802 FF_INPUT_BUFFER_PADDING_SIZE);
1803 if(st->codec->extradata == NULL)
1804 return AVERROR(ENOMEM);
1805 st->codec->extradata_size = track->codec_priv.size;
1806 memcpy(st->codec->extradata,
1807 track->codec_priv.data + extradata_offset,
1808 track->codec_priv.size);
1812 if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
1813 MatroskaTrackPlane *planes = track->operation.combine_planes.elem;
1815 st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
1816 st->codec->codec_tag = fourcc;
1817 st->codec->width = track->video.pixel_width;
1818 st->codec->height = track->video.pixel_height;
1819 av_reduce(&st->sample_aspect_ratio.num,
1820 &st->sample_aspect_ratio.den,
1821 st->codec->height * track->video.display_width,
1822 st->codec-> width * track->video.display_height,
1824 st->need_parsing = AVSTREAM_PARSE_HEADERS;
1825 if (track->default_duration) {
1826 av_reduce(&st->avg_frame_rate.num, &st->avg_frame_rate.den,
1827 1000000000, track->default_duration, 30000);
1828 #if FF_API_R_FRAME_RATE
1829 st->r_frame_rate = st->avg_frame_rate;
1833 /* export stereo mode flag as metadata tag */
1834 if (track->video.stereo_mode && track->video.stereo_mode < MATROSKA_VIDEO_STEREO_MODE_COUNT)
1835 av_dict_set(&st->metadata, "stereo_mode", ff_matroska_video_stereo_mode[track->video.stereo_mode], 0);
1837 /* export alpha mode flag as metadata tag */
1838 if (track->video.alpha_mode)
1839 av_dict_set(&st->metadata, "alpha_mode", "1", 0);
1841 /* if we have virtual track, mark the real tracks */
1842 for (j=0; j < track->operation.combine_planes.nb_elem; j++) {
1844 if (planes[j].type >= MATROSKA_VIDEO_STEREO_PLANE_COUNT)
1846 snprintf(buf, sizeof(buf), "%s_%d",
1847 ff_matroska_video_stereo_plane[planes[j].type], i);
1848 for (k=0; k < matroska->tracks.nb_elem; k++)
1849 if (planes[j].uid == tracks[k].uid) {
1850 av_dict_set(&s->streams[k]->metadata,
1851 "stereo_mode", buf, 0);
1855 } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
1856 st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
1857 st->codec->sample_rate = track->audio.out_samplerate;
1858 st->codec->channels = track->audio.channels;
1859 st->codec->bits_per_coded_sample = track->audio.bitdepth;
1860 if (st->codec->codec_id != AV_CODEC_ID_AAC)
1861 st->need_parsing = AVSTREAM_PARSE_HEADERS;
1862 } else if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE) {
1863 st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
1865 if (st->codec->codec_id == AV_CODEC_ID_SSA ||
1866 st->codec->codec_id == AV_CODEC_ID_ASS)
1868 if (st->codec->codec_id == AV_CODEC_ID_ASS)
1870 matroska->contains_ssa = 1;
1874 attachements = attachements_list->elem;
1875 for (j=0; j<attachements_list->nb_elem; j++) {
1876 if (!(attachements[j].filename && attachements[j].mime &&
1877 attachements[j].bin.data && attachements[j].bin.size > 0)) {
1878 av_log(matroska->ctx, AV_LOG_ERROR, "incomplete attachment\n");
1880 AVStream *st = avformat_new_stream(s, NULL);
1883 av_dict_set(&st->metadata, "filename",attachements[j].filename, 0);
1884 av_dict_set(&st->metadata, "mimetype", attachements[j].mime, 0);
1885 st->codec->codec_id = AV_CODEC_ID_NONE;
1886 st->codec->codec_type = AVMEDIA_TYPE_ATTACHMENT;
1887 st->codec->extradata = av_malloc(attachements[j].bin.size + FF_INPUT_BUFFER_PADDING_SIZE);
1888 if(st->codec->extradata == NULL)
1890 st->codec->extradata_size = attachements[j].bin.size;
1891 memcpy(st->codec->extradata, attachements[j].bin.data, attachements[j].bin.size);
1893 for (i=0; ff_mkv_mime_tags[i].id != AV_CODEC_ID_NONE; i++) {
1894 if (!strncmp(ff_mkv_mime_tags[i].str, attachements[j].mime,
1895 strlen(ff_mkv_mime_tags[i].str))) {
1896 st->codec->codec_id = ff_mkv_mime_tags[i].id;
1900 attachements[j].stream = st;
1904 chapters = chapters_list->elem;
1905 for (i=0; i<chapters_list->nb_elem; i++)
1906 if (chapters[i].start != AV_NOPTS_VALUE && chapters[i].uid
1907 && (max_start==0 || chapters[i].start > max_start)) {
1908 chapters[i].chapter =
1909 avpriv_new_chapter(s, chapters[i].uid, (AVRational){1, 1000000000},
1910 chapters[i].start, chapters[i].end,
1912 av_dict_set(&chapters[i].chapter->metadata,
1913 "title", chapters[i].title, 0);
1914 max_start = chapters[i].start;
1917 matroska_add_index_entries(matroska);
1919 matroska_convert_tags(s);
1925 * Put one packet in an application-supplied AVPacket struct.
1926 * Returns 0 on success or -1 on failure.
1928 static int matroska_deliver_packet(MatroskaDemuxContext *matroska,
1931 if (matroska->num_packets > 0) {
1932 memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
1933 av_free(matroska->packets[0]);
1934 if (matroska->num_packets > 1) {
1936 memmove(&matroska->packets[0], &matroska->packets[1],
1937 (matroska->num_packets - 1) * sizeof(AVPacket *));
1938 newpackets = av_realloc(matroska->packets,
1939 (matroska->num_packets - 1) * sizeof(AVPacket *));
1941 matroska->packets = newpackets;
1943 av_freep(&matroska->packets);
1944 matroska->prev_pkt = NULL;
1946 matroska->num_packets--;
1954 * Free all packets in our internal queue.
1956 static void matroska_clear_queue(MatroskaDemuxContext *matroska)
1958 matroska->prev_pkt = NULL;
1959 if (matroska->packets) {
1961 for (n = 0; n < matroska->num_packets; n++) {
1962 av_free_packet(matroska->packets[n]);
1963 av_free(matroska->packets[n]);
1965 av_freep(&matroska->packets);
1966 matroska->num_packets = 0;
1970 static int matroska_parse_laces(MatroskaDemuxContext *matroska, uint8_t **buf,
1971 int* buf_size, int type,
1972 uint32_t **lace_buf, int *laces)
1974 int res = 0, n, size = *buf_size;
1975 uint8_t *data = *buf;
1976 uint32_t *lace_size;
1980 *lace_buf = av_mallocz(sizeof(int));
1982 return AVERROR(ENOMEM);
1984 *lace_buf[0] = size;
1988 av_assert0(size > 0);
1992 lace_size = av_mallocz(*laces * sizeof(int));
1994 return AVERROR(ENOMEM);
1997 case 0x1: /* Xiph lacing */ {
2000 for (n = 0; res == 0 && n < *laces - 1; n++) {
2002 if (size <= total) {
2003 res = AVERROR_INVALIDDATA;
2008 lace_size[n] += temp;
2015 if (size <= total) {
2016 res = AVERROR_INVALIDDATA;
2020 lace_size[n] = size - total;
2024 case 0x2: /* fixed-size lacing */
2025 if (size % (*laces)) {
2026 res = AVERROR_INVALIDDATA;
2029 for (n = 0; n < *laces; n++)
2030 lace_size[n] = size / *laces;
2033 case 0x3: /* EBML lacing */ {
2036 n = matroska_ebmlnum_uint(matroska, data, size, &num);
2037 if (n < 0 || num > INT_MAX) {
2038 av_log(matroska->ctx, AV_LOG_INFO,
2039 "EBML block data error\n");
2040 res = n<0 ? n : AVERROR_INVALIDDATA;
2045 total = lace_size[0] = num;
2046 for (n = 1; res == 0 && n < *laces - 1; n++) {
2049 r = matroska_ebmlnum_sint(matroska, data, size, &snum);
2050 if (r < 0 || lace_size[n - 1] + snum > (uint64_t)INT_MAX) {
2051 av_log(matroska->ctx, AV_LOG_INFO,
2052 "EBML block data error\n");
2053 res = r<0 ? r : AVERROR_INVALIDDATA;
2058 lace_size[n] = lace_size[n - 1] + snum;
2059 total += lace_size[n];
2061 if (size <= total) {
2062 res = AVERROR_INVALIDDATA;
2065 lace_size[*laces - 1] = size - total;
2071 *lace_buf = lace_size;
2077 static int matroska_parse_rm_audio(MatroskaDemuxContext *matroska,
2078 MatroskaTrack *track,
2080 uint8_t *data, int size,
2084 int a = st->codec->block_align;
2085 int sps = track->audio.sub_packet_size;
2086 int cfs = track->audio.coded_framesize;
2087 int h = track->audio.sub_packet_h;
2088 int y = track->audio.sub_packet_cnt;
2089 int w = track->audio.frame_size;
2092 if (!track->audio.pkt_cnt) {
2093 if (track->audio.sub_packet_cnt == 0)
2094 track->audio.buf_timecode = timecode;
2095 if (st->codec->codec_id == AV_CODEC_ID_RA_288) {
2096 if (size < cfs * h / 2) {
2097 av_log(matroska->ctx, AV_LOG_ERROR,
2098 "Corrupt int4 RM-style audio packet size\n");
2099 return AVERROR_INVALIDDATA;
2101 for (x=0; x<h/2; x++)
2102 memcpy(track->audio.buf+x*2*w+y*cfs,
2104 } else if (st->codec->codec_id == AV_CODEC_ID_SIPR) {
2106 av_log(matroska->ctx, AV_LOG_ERROR,
2107 "Corrupt sipr RM-style audio packet size\n");
2108 return AVERROR_INVALIDDATA;
2110 memcpy(track->audio.buf + y*w, data, w);
2112 if (size < sps * w / sps || h<=0) {
2113 av_log(matroska->ctx, AV_LOG_ERROR,
2114 "Corrupt generic RM-style audio packet size\n");
2115 return AVERROR_INVALIDDATA;
2117 for (x=0; x<w/sps; x++)
2118 memcpy(track->audio.buf+sps*(h*x+((h+1)/2)*(y&1)+(y>>1)), data+x*sps, sps);
2121 if (++track->audio.sub_packet_cnt >= h) {
2122 if (st->codec->codec_id == AV_CODEC_ID_SIPR)
2123 ff_rm_reorder_sipr_data(track->audio.buf, h, w);
2124 track->audio.sub_packet_cnt = 0;
2125 track->audio.pkt_cnt = h*w / a;
2129 while (track->audio.pkt_cnt) {
2130 AVPacket *pkt = NULL;
2131 if (!(pkt = av_mallocz(sizeof(AVPacket))) || av_new_packet(pkt, a) < 0){
2133 return AVERROR(ENOMEM);
2135 memcpy(pkt->data, track->audio.buf
2136 + a * (h*w / a - track->audio.pkt_cnt--), a);
2137 pkt->pts = track->audio.buf_timecode;
2138 track->audio.buf_timecode = AV_NOPTS_VALUE;
2140 pkt->stream_index = st->index;
2141 dynarray_add(&matroska->packets,&matroska->num_packets,pkt);
2146 static int matroska_parse_frame(MatroskaDemuxContext *matroska,
2147 MatroskaTrack *track,
2149 uint8_t *data, int pkt_size,
2150 uint64_t timecode, uint64_t lace_duration,
2151 int64_t pos, int is_keyframe,
2152 uint8_t *additional, uint64_t additional_id, int additional_size)
2154 MatroskaTrackEncoding *encodings = track->encodings.elem;
2155 uint8_t *pkt_data = data;
2156 int offset = 0, res;
2159 if (encodings && !encodings->type && encodings->scope & 1) {
2160 res = matroska_decode_buffer(&pkt_data, &pkt_size, track);
2165 if (st->codec->codec_id == AV_CODEC_ID_PRORES)
2168 pkt = av_mallocz(sizeof(AVPacket));
2169 /* XXX: prevent data copy... */
2170 if (av_new_packet(pkt, pkt_size + offset) < 0) {
2172 return AVERROR(ENOMEM);
2175 if (st->codec->codec_id == AV_CODEC_ID_PRORES) {
2176 uint8_t *buf = pkt->data;
2177 bytestream_put_be32(&buf, pkt_size);
2178 bytestream_put_be32(&buf, MKBETAG('i', 'c', 'p', 'f'));
2181 memcpy(pkt->data + offset, pkt_data, pkt_size);
2183 if (pkt_data != data)
2186 pkt->flags = is_keyframe;
2187 pkt->stream_index = st->index;
2189 if (additional_size > 0) {
2190 uint8_t *side_data = av_packet_new_side_data(pkt,
2191 AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,
2192 additional_size + 8);
2193 if(side_data == NULL) {
2194 av_free_packet(pkt);
2196 return AVERROR(ENOMEM);
2198 AV_WB64(side_data, additional_id);
2199 memcpy(side_data + 8, additional, additional_size);
2202 if (track->ms_compat)
2203 pkt->dts = timecode;
2205 pkt->pts = timecode;
2207 if (st->codec->codec_id == AV_CODEC_ID_SUBRIP) {
2209 * For backward compatibility.
2210 * Historically, we have put subtitle duration
2211 * in convergence_duration, on the off chance
2212 * that the time_scale is less than 1us, which
2213 * could result in a 32bit overflow on the
2214 * normal duration field.
2216 pkt->convergence_duration = lace_duration;
2219 if (track->type != MATROSKA_TRACK_TYPE_SUBTITLE ||
2220 lace_duration <= INT_MAX) {
2222 * For non subtitle tracks, just store the duration
2225 * If it's a subtitle track and duration value does
2226 * not overflow a uint32, then also store it normally.
2228 pkt->duration = lace_duration;
2232 if (st->codec->codec_id == AV_CODEC_ID_SSA)
2233 matroska_fix_ass_packet(matroska, pkt, lace_duration);
2235 if (matroska->prev_pkt &&
2236 timecode != AV_NOPTS_VALUE &&
2237 matroska->prev_pkt->pts == timecode &&
2238 matroska->prev_pkt->stream_index == st->index &&
2239 st->codec->codec_id == AV_CODEC_ID_SSA)
2240 matroska_merge_packets(matroska->prev_pkt, pkt);
2242 dynarray_add(&matroska->packets,&matroska->num_packets,pkt);
2243 matroska->prev_pkt = pkt;
2246 dynarray_add(&matroska->packets, &matroska->num_packets, pkt);
2247 matroska->prev_pkt = pkt;
2253 static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data,
2254 int size, int64_t pos, uint64_t cluster_time,
2255 uint64_t block_duration, int is_keyframe,
2256 uint8_t *additional, uint64_t additional_id, int additional_size,
2257 int64_t cluster_pos)
2259 uint64_t timecode = AV_NOPTS_VALUE;
2260 MatroskaTrack *track;
2264 uint32_t *lace_size = NULL;
2265 int n, flags, laces = 0;
2268 if ((n = matroska_ebmlnum_uint(matroska, data, size, &num)) < 0) {
2269 av_log(matroska->ctx, AV_LOG_ERROR, "EBML block data error\n");
2275 track = matroska_find_track_by_num(matroska, num);
2276 if (!track || !track->stream) {
2277 av_log(matroska->ctx, AV_LOG_INFO,
2278 "Invalid stream %"PRIu64" or size %u\n", num, size);
2279 return AVERROR_INVALIDDATA;
2280 } else if (size <= 3)
2283 if (st->discard >= AVDISCARD_ALL)
2285 av_assert1(block_duration != AV_NOPTS_VALUE);
2287 block_time = AV_RB16(data);
2291 if (is_keyframe == -1)
2292 is_keyframe = flags & 0x80 ? AV_PKT_FLAG_KEY : 0;
2294 if (cluster_time != (uint64_t)-1
2295 && (block_time >= 0 || cluster_time >= -block_time)) {
2296 timecode = cluster_time + block_time;
2297 if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE
2298 && timecode < track->end_timecode)
2299 is_keyframe = 0; /* overlapping subtitles are not key frame */
2301 av_add_index_entry(st, cluster_pos, timecode, 0,0,AVINDEX_KEYFRAME);
2304 if (matroska->skip_to_keyframe && track->type != MATROSKA_TRACK_TYPE_SUBTITLE) {
2305 if (timecode < matroska->skip_to_timecode)
2307 if (!st->skip_to_keyframe) {
2308 av_log(matroska->ctx, AV_LOG_ERROR, "File is broken, keyframes not correctly marked!\n");
2309 matroska->skip_to_keyframe = 0;
2312 matroska->skip_to_keyframe = 0;
2315 res = matroska_parse_laces(matroska, &data, &size, (flags & 0x06) >> 1,
2316 &lace_size, &laces);
2321 if (!block_duration)
2322 block_duration = track->default_duration * laces / matroska->time_scale;
2324 if (cluster_time != (uint64_t)-1 && (block_time >= 0 || cluster_time >= -block_time))
2325 track->end_timecode =
2326 FFMAX(track->end_timecode, timecode + block_duration);
2328 for (n = 0; n < laces; n++) {
2329 int64_t lace_duration = block_duration*(n+1) / laces - block_duration*n / laces;
2331 if (lace_size[n] > size) {
2332 av_log(matroska->ctx, AV_LOG_ERROR, "Invalid packet size\n");
2336 if ((st->codec->codec_id == AV_CODEC_ID_RA_288 ||
2337 st->codec->codec_id == AV_CODEC_ID_COOK ||
2338 st->codec->codec_id == AV_CODEC_ID_SIPR ||
2339 st->codec->codec_id == AV_CODEC_ID_ATRAC3) &&
2340 st->codec->block_align && track->audio.sub_packet_size) {
2342 res = matroska_parse_rm_audio(matroska, track, st, data,
2349 res = matroska_parse_frame(matroska, track, st, data, lace_size[n],
2350 timecode, lace_duration,
2351 pos, !n? is_keyframe : 0,
2352 additional, additional_id, additional_size);
2357 if (timecode != AV_NOPTS_VALUE)
2358 timecode = lace_duration ? timecode + lace_duration : AV_NOPTS_VALUE;
2359 data += lace_size[n];
2360 size -= lace_size[n];
2368 static int matroska_parse_cluster_incremental(MatroskaDemuxContext *matroska)
2370 EbmlList *blocks_list;
2371 MatroskaBlock *blocks;
2373 res = ebml_parse(matroska,
2374 matroska_cluster_incremental_parsing,
2375 &matroska->current_cluster);
2378 if (matroska->current_cluster_pos)
2379 ebml_level_end(matroska);
2380 ebml_free(matroska_cluster, &matroska->current_cluster);
2381 memset(&matroska->current_cluster, 0, sizeof(MatroskaCluster));
2382 matroska->current_cluster_num_blocks = 0;
2383 matroska->current_cluster_pos = avio_tell(matroska->ctx->pb);
2384 matroska->prev_pkt = NULL;
2385 /* sizeof the ID which was already read */
2386 if (matroska->current_id)
2387 matroska->current_cluster_pos -= 4;
2388 res = ebml_parse(matroska,
2389 matroska_clusters_incremental,
2390 &matroska->current_cluster);
2391 /* Try parsing the block again. */
2393 res = ebml_parse(matroska,
2394 matroska_cluster_incremental_parsing,
2395 &matroska->current_cluster);
2399 matroska->current_cluster_num_blocks <
2400 matroska->current_cluster.blocks.nb_elem) {
2401 blocks_list = &matroska->current_cluster.blocks;
2402 blocks = blocks_list->elem;
2404 matroska->current_cluster_num_blocks = blocks_list->nb_elem;
2405 i = blocks_list->nb_elem - 1;
2406 if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
2407 int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
2408 uint8_t* additional = blocks[i].additional.size > 0 ?
2409 blocks[i].additional.data : NULL;
2410 if (!blocks[i].non_simple)
2411 blocks[i].duration = 0;
2412 res = matroska_parse_block(matroska,
2413 blocks[i].bin.data, blocks[i].bin.size,
2415 matroska->current_cluster.timecode,
2416 blocks[i].duration, is_keyframe,
2417 additional, blocks[i].additional_id,
2418 blocks[i].additional.size,
2419 matroska->current_cluster_pos);
2423 if (res < 0) matroska->done = 1;
2427 static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
2429 MatroskaCluster cluster = { 0 };
2430 EbmlList *blocks_list;
2431 MatroskaBlock *blocks;
2434 if (!matroska->contains_ssa)
2435 return matroska_parse_cluster_incremental(matroska);
2436 pos = avio_tell(matroska->ctx->pb);
2437 matroska->prev_pkt = NULL;
2438 if (matroska->current_id)
2439 pos -= 4; /* sizeof the ID which was already read */
2440 res = ebml_parse(matroska, matroska_clusters, &cluster);
2441 blocks_list = &cluster.blocks;
2442 blocks = blocks_list->elem;
2443 for (i=0; i<blocks_list->nb_elem; i++)
2444 if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
2445 int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
2446 res=matroska_parse_block(matroska,
2447 blocks[i].bin.data, blocks[i].bin.size,
2448 blocks[i].bin.pos, cluster.timecode,
2449 blocks[i].duration, is_keyframe, NULL, 0, 0,
2452 ebml_free(matroska_cluster, &cluster);
2456 static int matroska_read_packet(AVFormatContext *s, AVPacket *pkt)
2458 MatroskaDemuxContext *matroska = s->priv_data;
2460 while (matroska_deliver_packet(matroska, pkt)) {
2461 int64_t pos = avio_tell(matroska->ctx->pb);
2464 if (matroska_parse_cluster(matroska) < 0)
2465 matroska_resync(matroska, pos);
2471 static int matroska_read_seek(AVFormatContext *s, int stream_index,
2472 int64_t timestamp, int flags)
2474 MatroskaDemuxContext *matroska = s->priv_data;
2475 MatroskaTrack *tracks = matroska->tracks.elem;
2476 AVStream *st = s->streams[stream_index];
2477 int i, index, index_sub, index_min;
2479 /* Parse the CUES now since we need the index data to seek. */
2480 if (matroska->cues_parsing_deferred > 0) {
2481 matroska->cues_parsing_deferred = 0;
2482 matroska_parse_cues(matroska);
2485 if (!st->nb_index_entries)
2487 timestamp = FFMAX(timestamp, st->index_entries[0].timestamp);
2489 if ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
2490 avio_seek(s->pb, st->index_entries[st->nb_index_entries-1].pos, SEEK_SET);
2491 matroska->current_id = 0;
2492 while ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
2493 matroska_clear_queue(matroska);
2494 if (matroska_parse_cluster(matroska) < 0)
2499 matroska_clear_queue(matroska);
2500 if (index < 0 || (matroska->cues_parsing_deferred < 0 && index == st->nb_index_entries - 1))
2504 for (i=0; i < matroska->tracks.nb_elem; i++) {
2505 tracks[i].audio.pkt_cnt = 0;
2506 tracks[i].audio.sub_packet_cnt = 0;
2507 tracks[i].audio.buf_timecode = AV_NOPTS_VALUE;
2508 tracks[i].end_timecode = 0;
2509 if (tracks[i].type == MATROSKA_TRACK_TYPE_SUBTITLE
2510 && tracks[i].stream->discard != AVDISCARD_ALL) {
2511 index_sub = av_index_search_timestamp(tracks[i].stream, st->index_entries[index].timestamp, AVSEEK_FLAG_BACKWARD);
2513 && st->index_entries[index_sub].pos < st->index_entries[index_min].pos
2514 && st->index_entries[index].timestamp - st->index_entries[index_sub].timestamp < 30000000000/matroska->time_scale)
2515 index_min = index_sub;
2519 avio_seek(s->pb, st->index_entries[index_min].pos, SEEK_SET);
2520 matroska->current_id = 0;
2521 if (flags & AVSEEK_FLAG_ANY) {
2522 st->skip_to_keyframe = 0;
2523 matroska->skip_to_timecode = timestamp;
2525 st->skip_to_keyframe = 1;
2526 matroska->skip_to_timecode = st->index_entries[index].timestamp;
2528 matroska->skip_to_keyframe = 1;
2530 matroska->num_levels = 0;
2531 ff_update_cur_dts(s, st, st->index_entries[index].timestamp);
2534 // slightly hackish but allows proper fallback to
2535 // the generic seeking code.
2536 matroska_clear_queue(matroska);
2537 matroska->current_id = 0;
2538 st->skip_to_keyframe =
2539 matroska->skip_to_keyframe = 0;
2541 matroska->num_levels = 0;
2545 static int matroska_read_close(AVFormatContext *s)
2547 MatroskaDemuxContext *matroska = s->priv_data;
2548 MatroskaTrack *tracks = matroska->tracks.elem;
2551 matroska_clear_queue(matroska);
2553 for (n=0; n < matroska->tracks.nb_elem; n++)
2554 if (tracks[n].type == MATROSKA_TRACK_TYPE_AUDIO)
2555 av_free(tracks[n].audio.buf);
2556 ebml_free(matroska_cluster, &matroska->current_cluster);
2557 ebml_free(matroska_segment, matroska);
2562 AVInputFormat ff_matroska_demuxer = {
2563 .name = "matroska,webm",
2564 .long_name = NULL_IF_CONFIG_SMALL("Matroska / WebM"),
2565 .priv_data_size = sizeof(MatroskaDemuxContext),
2566 .read_probe = matroska_probe,
2567 .read_header = matroska_read_header,
2568 .read_packet = matroska_read_packet,
2569 .read_close = matroska_read_close,
2570 .read_seek = matroska_read_seek,