]> git.sesse.net Git - ffmpeg/blob - libavformat/matroska.c
fix -a^b which was interpreted as (-a)^b
[ffmpeg] / libavformat / matroska.c
1 /*
2  * Matroska file demuxer (no muxer yet)
3  * Copyright (c) 2003-2004 The ffmpeg Project
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file matroska.c
24  * Matroska file demuxer
25  * by Ronald Bultje <rbultje@ronald.bitfreak.net>
26  * with a little help from Moritz Bunkus <moritz@bunkus.org>
27  * Specs available on the matroska project page:
28  * http://www.matroska.org/.
29  */
30
31 #include "avformat.h"
32 /* For codec_get_bmp_id and codec_get_wav_id. */
33 #include "riff.h"
34 #include "intfloat_readwrite.h"
35
36 /* EBML version supported */
37 #define EBML_VERSION 1
38
39 /* top-level master-IDs */
40 #define EBML_ID_HEADER             0x1A45DFA3
41
42 /* IDs in the HEADER master */
43 #define EBML_ID_EBMLVERSION        0x4286
44 #define EBML_ID_EBMLREADVERSION    0x42F7
45 #define EBML_ID_EBMLMAXIDLENGTH    0x42F2
46 #define EBML_ID_EBMLMAXSIZELENGTH  0x42F3
47 #define EBML_ID_DOCTYPE            0x4282
48 #define EBML_ID_DOCTYPEVERSION     0x4287
49 #define EBML_ID_DOCTYPEREADVERSION 0x4285
50
51 /* general EBML types */
52 #define EBML_ID_VOID               0xEC
53
54 /*
55  * Matroska element IDs. max. 32-bit.
56  */
57
58 /* toplevel segment */
59 #define MATROSKA_ID_SEGMENT    0x18538067
60
61 /* matroska top-level master IDs */
62 #define MATROSKA_ID_INFO       0x1549A966
63 #define MATROSKA_ID_TRACKS     0x1654AE6B
64 #define MATROSKA_ID_CUES       0x1C53BB6B
65 #define MATROSKA_ID_TAGS       0x1254C367
66 #define MATROSKA_ID_SEEKHEAD   0x114D9B74
67 #define MATROSKA_ID_CLUSTER    0x1F43B675
68
69 /* IDs in the info master */
70 #define MATROSKA_ID_TIMECODESCALE 0x2AD7B1
71 #define MATROSKA_ID_DURATION   0x4489
72 #define MATROSKA_ID_WRITINGAPP 0x5741
73 #define MATROSKA_ID_MUXINGAPP  0x4D80
74 #define MATROSKA_ID_DATEUTC    0x4461
75
76 /* ID in the tracks master */
77 #define MATROSKA_ID_TRACKENTRY 0xAE
78
79 /* IDs in the trackentry master */
80 #define MATROSKA_ID_TRACKNUMBER 0xD7
81 #define MATROSKA_ID_TRACKUID   0x73C5
82 #define MATROSKA_ID_TRACKTYPE  0x83
83 #define MATROSKA_ID_TRACKAUDIO 0xE1
84 #define MATROSKA_ID_TRACKVIDEO 0xE0
85 #define MATROSKA_ID_CODECID    0x86
86 #define MATROSKA_ID_CODECPRIVATE 0x63A2
87 #define MATROSKA_ID_CODECNAME  0x258688
88 #define MATROSKA_ID_CODECINFOURL 0x3B4040
89 #define MATROSKA_ID_CODECDOWNLOADURL 0x26B240
90 #define MATROSKA_ID_TRACKNAME  0x536E
91 #define MATROSKA_ID_TRACKLANGUAGE 0x22B59C
92 #define MATROSKA_ID_TRACKFLAGENABLED 0xB9
93 #define MATROSKA_ID_TRACKFLAGDEFAULT 0x88
94 #define MATROSKA_ID_TRACKFLAGLACING 0x9C
95 #define MATROSKA_ID_TRACKMINCACHE 0x6DE7
96 #define MATROSKA_ID_TRACKMAXCACHE 0x6DF8
97 #define MATROSKA_ID_TRACKDEFAULTDURATION 0x23E383
98
99 /* IDs in the trackvideo master */
100 #define MATROSKA_ID_VIDEOFRAMERATE 0x2383E3
101 #define MATROSKA_ID_VIDEODISPLAYWIDTH 0x54B0
102 #define MATROSKA_ID_VIDEODISPLAYHEIGHT 0x54BA
103 #define MATROSKA_ID_VIDEOPIXELWIDTH 0xB0
104 #define MATROSKA_ID_VIDEOPIXELHEIGHT 0xBA
105 #define MATROSKA_ID_VIDEOFLAGINTERLACED 0x9A
106 #define MATROSKA_ID_VIDEOSTEREOMODE 0x53B9
107 #define MATROSKA_ID_VIDEOASPECTRATIO 0x54B3
108 #define MATROSKA_ID_VIDEOCOLOURSPACE 0x2EB524
109
110 /* IDs in the trackaudio master */
111 #define MATROSKA_ID_AUDIOSAMPLINGFREQ 0xB5
112 #define MATROSKA_ID_AUDIOBITDEPTH 0x6264
113 #define MATROSKA_ID_AUDIOCHANNELS 0x9F
114
115 /* ID in the cues master */
116 #define MATROSKA_ID_POINTENTRY 0xBB
117
118 /* IDs in the pointentry master */
119 #define MATROSKA_ID_CUETIME    0xB3
120 #define MATROSKA_ID_CUETRACKPOSITION 0xB7
121
122 /* IDs in the cuetrackposition master */
123 #define MATROSKA_ID_CUETRACK   0xF7
124 #define MATROSKA_ID_CUECLUSTERPOSITION 0xF1
125
126 /* IDs in the tags master */
127 /* TODO */
128
129 /* IDs in the seekhead master */
130 #define MATROSKA_ID_SEEKENTRY  0x4DBB
131
132 /* IDs in the seekpoint master */
133 #define MATROSKA_ID_SEEKID     0x53AB
134 #define MATROSKA_ID_SEEKPOSITION 0x53AC
135
136 /* IDs in the cluster master */
137 #define MATROSKA_ID_CLUSTERTIMECODE 0xE7
138 #define MATROSKA_ID_BLOCKGROUP 0xA0
139
140 /* IDs in the blockgroup master */
141 #define MATROSKA_ID_BLOCK      0xA1
142 #define MATROSKA_ID_BLOCKDURATION 0x9B
143 #define MATROSKA_ID_BLOCKREFERENCE 0xFB
144
145 typedef enum {
146   MATROSKA_TRACK_TYPE_VIDEO    = 0x1,
147   MATROSKA_TRACK_TYPE_AUDIO    = 0x2,
148   MATROSKA_TRACK_TYPE_COMPLEX  = 0x3,
149   MATROSKA_TRACK_TYPE_LOGO     = 0x10,
150   MATROSKA_TRACK_TYPE_SUBTITLE = 0x11,
151   MATROSKA_TRACK_TYPE_CONTROL  = 0x20,
152 } MatroskaTrackType;
153
154 typedef enum {
155   MATROSKA_EYE_MODE_MONO  = 0x0,
156   MATROSKA_EYE_MODE_RIGHT = 0x1,
157   MATROSKA_EYE_MODE_LEFT  = 0x2,
158   MATROSKA_EYE_MODE_BOTH  = 0x3,
159 } MatroskaEyeMode;
160
161 typedef enum {
162   MATROSKA_ASPECT_RATIO_MODE_FREE  = 0x0,
163   MATROSKA_ASPECT_RATIO_MODE_KEEP  = 0x1,
164   MATROSKA_ASPECT_RATIO_MODE_FIXED = 0x2,
165 } MatroskaAspectRatioMode;
166
167 /*
168  * These aren't in any way "matroska-form" things,
169  * it's just something I use in the muxer/demuxer.
170  */
171
172 typedef enum {
173   MATROSKA_TRACK_ENABLED = (1<<0),
174   MATROSKA_TRACK_DEFAULT = (1<<1),
175   MATROSKA_TRACK_LACING  = (1<<2),
176   MATROSKA_TRACK_SHIFT   = (1<<16)
177 } MatroskaTrackFlags;
178
179 typedef enum {
180   MATROSKA_VIDEOTRACK_INTERLACED = (MATROSKA_TRACK_SHIFT<<0)
181 } MatroskaVideoTrackFlags;
182
183 /*
184  * Matroska Codec IDs. Strings.
185  */
186
187 typedef struct CodecTags{
188     const char *str;
189     enum CodecID id;
190 }CodecTags;
191
192 #define MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC   "V_MS/VFW/FOURCC"
193 #define MATROSKA_CODEC_ID_AUDIO_ACM          "A_MS/ACM"
194
195 static CodecTags codec_tags[]={
196 //    {"V_MS/VFW/FOURCC"  , CODEC_ID_NONE},
197     {"V_UNCOMPRESSED"   , CODEC_ID_RAWVIDEO},
198     {"V_MPEG4/ISO/SP"   , CODEC_ID_MPEG4},
199     {"V_MPEG4/ISO/ASP"  , CODEC_ID_MPEG4},
200     {"V_MPEG4/ISO/AP"   , CODEC_ID_MPEG4},
201     {"V_MPEG4/ISO/AVC"  , CODEC_ID_H264},
202     {"V_MPEG4/MS/V3"    , CODEC_ID_MSMPEG4V3},
203     {"V_MPEG1"          , CODEC_ID_MPEG1VIDEO},
204     {"V_MPEG2"          , CODEC_ID_MPEG2VIDEO},
205     {"V_MJPEG"          , CODEC_ID_MJPEG},
206     {"V_REAL/RV10"      , CODEC_ID_RV10},
207     {"V_REAL/RV20"      , CODEC_ID_RV20},
208     {"V_REAL/RV30"      , CODEC_ID_RV30},
209     {"V_REAL/RV40"      , CODEC_ID_RV40},
210 /* TODO: Real/Quicktime */
211
212 //    {"A_MS/ACM"         , CODEC_ID_NONE},
213     {"A_MPEG/L1"        , CODEC_ID_MP3},
214     {"A_MPEG/L2"        , CODEC_ID_MP3},
215     {"A_MPEG/L3"        , CODEC_ID_MP3},
216     {"A_PCM/INT/BIG"    , CODEC_ID_PCM_U16BE},
217     {"A_PCM/INT/LIT"    , CODEC_ID_PCM_U16LE},
218 //    {"A_PCM/FLOAT/IEEE" , CODEC_ID_NONE},
219     {"A_AC3"            , CODEC_ID_AC3},
220     {"A_DTS"            , CODEC_ID_DTS},
221     {"A_VORBIS"         , CODEC_ID_VORBIS},
222     {"A_AAC/MPEG2/"     , CODEC_ID_AAC},
223     {"A_AAC/MPEG4/"     , CODEC_ID_AAC},
224     {NULL               , CODEC_ID_NONE}
225 /* TODO: AC3-9/10 (?), Real, Musepack, Quicktime */
226 };
227
228 /* max. depth in the EBML tree structure */
229 #define EBML_MAX_DEPTH 16
230
231 typedef struct Track {
232     MatroskaTrackType type;
233
234     /* Unique track number and track ID. stream_index is the index that
235      * the calling app uses for this track. */
236     uint32_t num,
237         uid,
238         stream_index;
239
240     char *name,
241         *language;
242
243     char *codec_id,
244         *codec_name;
245
246     unsigned char *codec_priv;
247     int codec_priv_size;
248
249     int64_t default_duration;
250     MatroskaTrackFlags flags;
251 } MatroskaTrack;
252
253 typedef struct MatroskaVideoTrack {
254     MatroskaTrack track;
255
256     int pixel_width,
257         pixel_height,
258         display_width,
259         display_height;
260
261     uint32_t fourcc;
262
263     MatroskaAspectRatioMode ar_mode;
264     MatroskaEyeMode eye_mode;
265
266     //..
267 } MatroskaVideoTrack;
268
269 typedef struct MatroskaAudioTrack {
270     MatroskaTrack track;
271
272     int channels,
273         bitdepth,
274         samplerate;
275     //..
276 } MatroskaAudioTrack;
277
278 typedef struct MatroskaSubtitleTrack {
279     MatroskaTrack track;
280
281     //..
282 } MatroskaSubtitleTrack;
283
284 typedef struct MatroskaLevel {
285     uint64_t start, length;
286 } MatroskaLevel;
287
288 typedef struct MatroskaDemuxIndex {
289   uint64_t        pos;   /* of the corresponding *cluster*! */
290   uint16_t        track; /* reference to 'num' */
291   uint64_t        time;  /* in nanoseconds */
292 } MatroskaDemuxIndex;
293
294 typedef struct MatroskaDemuxContext {
295     AVFormatContext *ctx;
296
297     /* ebml stuff */
298     int num_levels;
299     MatroskaLevel levels[EBML_MAX_DEPTH];
300     int level_up;
301
302     /* matroska stuff */
303     char *writing_app,
304         *muxing_app;
305     int64_t created;
306
307     /* timescale in the file */
308     int64_t time_scale;
309
310     /* position (time, ns) */
311     int64_t pos;
312
313     /* num_streams is the number of streams that av_new_stream() was called
314      * for ( = that are available to the calling program). */
315     int num_tracks, num_streams;
316     MatroskaTrack *tracks[MAX_STREAMS];
317
318     /* cache for ID peeking */
319     uint32_t peek_id;
320
321     /* byte position of the segment inside the stream */
322     offset_t segment_start;
323
324     /* The packet queue. */
325     AVPacket **packets;
326     int num_packets;
327
328     /* have we already parse metadata/cues/clusters? */
329     int metadata_parsed,
330         index_parsed,
331         done;
332
333     /* The index for seeking. */
334     int num_indexes;
335     MatroskaDemuxIndex *index;
336 } MatroskaDemuxContext;
337
338 /*
339  * The first few functions handle EBML file parsing. The rest
340  * is the document interpretation. Matroska really just is a
341  * EBML file.
342  */
343
344 /*
345  * Return: the amount of levels in the hierarchy that the
346  * current element lies higher than the previous one.
347  * The opposite isn't done - that's auto-done using master
348  * element reading.
349  */
350
351 static int
352 ebml_read_element_level_up (MatroskaDemuxContext *matroska)
353 {
354     ByteIOContext *pb = &matroska->ctx->pb;
355     offset_t pos = url_ftell(pb);
356     int num = 0;
357
358     while (matroska->num_levels > 0) {
359         MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
360
361         if (pos >= level->start + level->length) {
362             matroska->num_levels--;
363             num++;
364         } else {
365             break;
366         }
367     }
368
369     return num;
370 }
371
372 /*
373  * Read: an "EBML number", which is defined as a variable-length
374  * array of bytes. The first byte indicates the length by giving a
375  * number of 0-bits followed by a one. The position of the first
376  * "one" bit inside the first byte indicates the length of this
377  * number.
378  * Returns: num. of bytes read. < 0 on error.
379  */
380
381 static int
382 ebml_read_num (MatroskaDemuxContext *matroska,
383                int                   max_size,
384                uint64_t             *number)
385 {
386     ByteIOContext *pb = &matroska->ctx->pb;
387     int len_mask = 0x80, read = 1, n = 1;
388     int64_t total = 0;
389
390     /* the first byte tells us the length in bytes - get_byte() can normally
391      * return 0, but since that's not a valid first ebmlID byte, we can
392      * use it safely here to catch EOS. */
393     if (!(total = get_byte(pb))) {
394         /* we might encounter EOS here */
395         if (!url_feof(pb)) {
396             offset_t pos = url_ftell(pb);
397             av_log(matroska->ctx, AV_LOG_ERROR,
398                    "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
399                    pos, pos);
400         }
401         return AVERROR_IO; /* EOS or actual I/O error */
402     }
403
404     /* get the length of the EBML number */
405     while (read <= max_size && !(total & len_mask)) {
406         read++;
407         len_mask >>= 1;
408     }
409     if (read > max_size) {
410         offset_t pos = url_ftell(pb) - 1;
411         av_log(matroska->ctx, AV_LOG_ERROR,
412                "Invalid EBML number size tag 0x%02x at pos %"PRIu64" (0x%"PRIx64")\n",
413                (uint8_t) total, pos, pos);
414         return AVERROR_INVALIDDATA;
415     }
416
417     /* read out length */
418     total &= ~len_mask;
419     while (n++ < read)
420         total = (total << 8) | get_byte(pb);
421
422     *number = total;
423
424     return read;
425 }
426
427 /*
428  * Read: the element content data ID.
429  * Return: the number of bytes read or < 0 on error.
430  */
431
432 static int
433 ebml_read_element_id (MatroskaDemuxContext *matroska,
434                       uint32_t             *id,
435                       int                  *level_up)
436 {
437     int read;
438     uint64_t total;
439
440     /* if we re-call this, use our cached ID */
441     if (matroska->peek_id != 0) {
442         if (level_up)
443             *level_up = 0;
444         *id = matroska->peek_id;
445         return 0;
446     }
447
448     /* read out the "EBML number", include tag in ID */
449     if ((read = ebml_read_num(matroska, 4, &total)) < 0)
450         return read;
451     *id = matroska->peek_id  = total | (1 << (read * 7));
452
453     /* level tracking */
454     if (level_up)
455         *level_up = ebml_read_element_level_up(matroska);
456
457     return read;
458 }
459
460 /*
461  * Read: element content length.
462  * Return: the number of bytes read or < 0 on error.
463  */
464
465 static int
466 ebml_read_element_length (MatroskaDemuxContext *matroska,
467                           uint64_t             *length)
468 {
469     /* clear cache since we're now beyond that data point */
470     matroska->peek_id = 0;
471
472     /* read out the "EBML number", include tag in ID */
473     return ebml_read_num(matroska, 8, length);
474 }
475
476 /*
477  * Return: the ID of the next element, or 0 on error.
478  * Level_up contains the amount of levels that this
479  * next element lies higher than the previous one.
480  */
481
482 static uint32_t
483 ebml_peek_id (MatroskaDemuxContext *matroska,
484               int                  *level_up)
485 {
486     uint32_t id;
487
488     assert(level_up != NULL);
489
490     if (ebml_read_element_id(matroska, &id, level_up) < 0)
491         return 0;
492
493     return id;
494 }
495
496 /*
497  * Seek to a given offset.
498  * 0 is success, -1 is failure.
499  */
500
501 static int
502 ebml_read_seek (MatroskaDemuxContext *matroska,
503                 offset_t              offset)
504 {
505     ByteIOContext *pb = &matroska->ctx->pb;
506
507     /* clear ID cache, if any */
508     matroska->peek_id = 0;
509
510     return (url_fseek(pb, offset, SEEK_SET) == offset) ? 0 : -1;
511 }
512
513 /*
514  * Skip the next element.
515  * 0 is success, -1 is failure.
516  */
517
518 static int
519 ebml_read_skip (MatroskaDemuxContext *matroska)
520 {
521     ByteIOContext *pb = &matroska->ctx->pb;
522     uint32_t id;
523     uint64_t length;
524     int res;
525
526     if ((res = ebml_read_element_id(matroska, &id, NULL)) < 0 ||
527         (res = ebml_read_element_length(matroska, &length)) < 0)
528         return res;
529
530     url_fskip(pb, length);
531
532     return 0;
533 }
534
535 /*
536  * Read the next element as an unsigned int.
537  * 0 is success, < 0 is failure.
538  */
539
540 static int
541 ebml_read_uint (MatroskaDemuxContext *matroska,
542                 uint32_t             *id,
543                 uint64_t             *num)
544 {
545     ByteIOContext *pb = &matroska->ctx->pb;
546     int n = 0, size, res;
547     uint64_t rlength;
548
549     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
550         (res = ebml_read_element_length(matroska, &rlength)) < 0)
551         return res;
552     size = rlength;
553     if (size < 1 || size > 8) {
554         offset_t pos = url_ftell(pb);
555         av_log(matroska->ctx, AV_LOG_ERROR,
556                "Invalid uint element size %d at position %"PRId64" (0x%"PRIx64")\n",
557                 size, pos, pos);
558         return AVERROR_INVALIDDATA;
559     }
560
561     /* big-endian ordening; build up number */
562     *num = 0;
563     while (n++ < size)
564         *num = (*num << 8) | get_byte(pb);
565
566     return 0;
567 }
568
569 /*
570  * Read the next element as a signed int.
571  * 0 is success, < 0 is failure.
572  */
573
574 static int
575 ebml_read_sint (MatroskaDemuxContext *matroska,
576                 uint32_t             *id,
577                 int64_t              *num)
578 {
579     ByteIOContext *pb = &matroska->ctx->pb;
580     int size, n = 1, negative = 0, res;
581     uint64_t rlength;
582
583     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
584         (res = ebml_read_element_length(matroska, &rlength)) < 0)
585         return res;
586     size = rlength;
587     if (size < 1 || size > 8) {
588         offset_t pos = url_ftell(pb);
589         av_log(matroska->ctx, AV_LOG_ERROR,
590                "Invalid sint element size %d at position %"PRId64" (0x%"PRIx64")\n",
591                 size, pos, pos);
592         return AVERROR_INVALIDDATA;
593     }
594     if ((*num = get_byte(pb)) & 0x80) {
595         negative = 1;
596         *num &= ~0x80;
597     }
598     *num = 0;
599     while (n++ < size)
600         *num = (*num << 8) | get_byte(pb);
601
602     /* make signed */
603     if (negative)
604         *num = *num - (1LL << ((8 * size) - 1));
605
606     return 0;
607 }
608
609 /*
610  * Read the next element as a float.
611  * 0 is success, < 0 is failure.
612  */
613
614 static int
615 ebml_read_float (MatroskaDemuxContext *matroska,
616                  uint32_t             *id,
617                  double               *num)
618 {
619     ByteIOContext *pb = &matroska->ctx->pb;
620     int size, res;
621     uint64_t rlength;
622
623     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
624         (res = ebml_read_element_length(matroska, &rlength)) < 0)
625         return res;
626     size = rlength;
627
628     if (size == 4) {
629         *num= av_int2flt(get_be32(pb));
630     } else if(size==8){
631         *num= av_int2dbl(get_be64(pb));
632     } else if(size==10){
633         av_log(matroska->ctx, AV_LOG_ERROR,
634                "FIXME! 10-byte floats unimplemented\n");
635         return AVERROR_UNKNOWN;
636     } else{
637         offset_t pos = url_ftell(pb);
638         av_log(matroska->ctx, AV_LOG_ERROR,
639                "Invalid float element size %d at position %"PRIu64" (0x%"PRIx64")\n",
640                size, pos, pos);
641         return AVERROR_INVALIDDATA;
642     }
643
644     return 0;
645 }
646
647 /*
648  * Read the next element as an ASCII string.
649  * 0 is success, < 0 is failure.
650  */
651
652 static int
653 ebml_read_ascii (MatroskaDemuxContext *matroska,
654                  uint32_t             *id,
655                  char                **str)
656 {
657     ByteIOContext *pb = &matroska->ctx->pb;
658     int size, res;
659     uint64_t rlength;
660
661     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
662         (res = ebml_read_element_length(matroska, &rlength)) < 0)
663         return res;
664     size = rlength;
665
666     /* ebml strings are usually not 0-terminated, so we allocate one
667      * byte more, read the string and NULL-terminate it ourselves. */
668     if (size < 0 || !(*str = av_malloc(size + 1))) {
669         av_log(matroska->ctx, AV_LOG_ERROR, "Memory allocation failed\n");
670         return AVERROR_NOMEM;
671     }
672     if (get_buffer(pb, (uint8_t *) *str, size) != size) {
673         offset_t pos = url_ftell(pb);
674         av_log(matroska->ctx, AV_LOG_ERROR,
675                "Read error at pos. %"PRIu64" (0x%"PRIx64")\n", pos, pos);
676         return AVERROR_IO;
677     }
678     (*str)[size] = '\0';
679
680     return 0;
681 }
682
683 /*
684  * Read the next element as a UTF-8 string.
685  * 0 is success, < 0 is failure.
686  */
687
688 static int
689 ebml_read_utf8 (MatroskaDemuxContext *matroska,
690                 uint32_t             *id,
691                 char                **str)
692 {
693   return ebml_read_ascii(matroska, id, str);
694 }
695
696 /*
697  * Read the next element as a date (nanoseconds since 1/1/2000).
698  * 0 is success, < 0 is failure.
699  */
700
701 static int
702 ebml_read_date (MatroskaDemuxContext *matroska,
703                 uint32_t             *id,
704                 int64_t              *date)
705 {
706   return ebml_read_sint(matroska, id, date);
707 }
708
709 /*
710  * Read the next element, but only the header. The contents
711  * are supposed to be sub-elements which can be read separately.
712  * 0 is success, < 0 is failure.
713  */
714
715 static int
716 ebml_read_master (MatroskaDemuxContext *matroska,
717                   uint32_t             *id)
718 {
719     ByteIOContext *pb = &matroska->ctx->pb;
720     uint64_t length;
721     MatroskaLevel *level;
722     int res;
723
724     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
725         (res = ebml_read_element_length(matroska, &length)) < 0)
726         return res;
727
728     /* protect... (Heaven forbids that the '>' is true) */
729     if (matroska->num_levels >= EBML_MAX_DEPTH) {
730         av_log(matroska->ctx, AV_LOG_ERROR,
731                "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
732         return AVERROR_NOTSUPP;
733     }
734
735     /* remember level */
736     level = &matroska->levels[matroska->num_levels++];
737     level->start = url_ftell(pb);
738     level->length = length;
739
740     return 0;
741 }
742
743 /*
744  * Read the next element as binary data.
745  * 0 is success, < 0 is failure.
746  */
747
748 static int
749 ebml_read_binary (MatroskaDemuxContext *matroska,
750                   uint32_t             *id,
751                   uint8_t             **binary,
752                   int                  *size)
753 {
754     ByteIOContext *pb = &matroska->ctx->pb;
755     uint64_t rlength;
756     int res;
757
758     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
759         (res = ebml_read_element_length(matroska, &rlength)) < 0)
760         return res;
761     *size = rlength;
762
763     if (!(*binary = av_malloc(*size))) {
764         av_log(matroska->ctx, AV_LOG_ERROR,
765                "Memory allocation error\n");
766         return AVERROR_NOMEM;
767     }
768
769     if (get_buffer(pb, *binary, *size) != *size) {
770         offset_t pos = url_ftell(pb);
771         av_log(matroska->ctx, AV_LOG_ERROR,
772                "Read error at pos. %"PRIu64" (0x%"PRIx64")\n", pos, pos);
773         return AVERROR_IO;
774     }
775
776     return 0;
777 }
778
779 /*
780  * Read signed/unsigned "EBML" numbers.
781  * Return: number of bytes processed, < 0 on error.
782  * XXX: use ebml_read_num().
783  */
784
785 static int
786 matroska_ebmlnum_uint (uint8_t  *data,
787                        uint32_t  size,
788                        uint64_t *num)
789 {
790     int len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
791     uint64_t total;
792
793     if (size <= 0)
794         return AVERROR_INVALIDDATA;
795
796     total = data[0];
797     while (read <= 8 && !(total & len_mask)) {
798         read++;
799         len_mask >>= 1;
800     }
801     if (read > 8)
802         return AVERROR_INVALIDDATA;
803
804     if ((total &= (len_mask - 1)) == len_mask - 1)
805         num_ffs++;
806     if (size < read)
807         return AVERROR_INVALIDDATA;
808     while (n < read) {
809         if (data[n] == 0xff)
810             num_ffs++;
811         total = (total << 8) | data[n];
812         n++;
813     }
814
815     if (read == num_ffs)
816         *num = (uint64_t)-1;
817     else
818         *num = total;
819
820     return read;
821 }
822
823 /*
824  * Same as above, but signed.
825  */
826
827 static int
828 matroska_ebmlnum_sint (uint8_t  *data,
829                        uint32_t  size,
830                        int64_t  *num)
831 {
832     uint64_t unum;
833     int res;
834
835     /* read as unsigned number first */
836     if ((res = matroska_ebmlnum_uint(data, size, &unum)) < 0)
837         return res;
838
839     /* make signed (weird way) */
840     if (unum == (uint64_t)-1)
841         *num = INT64_MAX;
842     else
843         *num = unum - ((1LL << ((7 * res) - 1)) - 1);
844
845     return res;
846 }
847
848 /*
849  * Read an EBML header.
850  * 0 is success, < 0 is failure.
851  */
852
853 static int
854 ebml_read_header (MatroskaDemuxContext *matroska,
855                   char                **doctype,
856                   int                  *version)
857 {
858     uint32_t id;
859     int level_up, res = 0;
860
861     /* default init */
862     if (doctype)
863         *doctype = NULL;
864     if (version)
865         *version = 1;
866
867     if (!(id = ebml_peek_id(matroska, &level_up)) ||
868         level_up != 0 || id != EBML_ID_HEADER) {
869         av_log(matroska->ctx, AV_LOG_ERROR,
870                "This is not an EBML file (id=0x%x/0x%x)\n", id, EBML_ID_HEADER);
871         return AVERROR_INVALIDDATA;
872     }
873     if ((res = ebml_read_master(matroska, &id)) < 0)
874         return res;
875
876     while (res == 0) {
877         if (!(id = ebml_peek_id(matroska, &level_up)))
878             return AVERROR_IO;
879
880         /* end-of-header */
881         if (level_up)
882             break;
883
884         switch (id) {
885             /* is our read version uptodate? */
886             case EBML_ID_EBMLREADVERSION: {
887                 uint64_t num;
888
889                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
890                     return res;
891                 if (num > EBML_VERSION) {
892                     av_log(matroska->ctx, AV_LOG_ERROR,
893                            "EBML version %"PRIu64" (> %d) is not supported\n",
894                            num, EBML_VERSION);
895                     return AVERROR_INVALIDDATA;
896                 }
897                 break;
898             }
899
900             /* we only handle 8 byte lengths at max */
901             case EBML_ID_EBMLMAXSIZELENGTH: {
902                 uint64_t num;
903
904                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
905                     return res;
906                 if (num > sizeof(uint64_t)) {
907                     av_log(matroska->ctx, AV_LOG_ERROR,
908                            "Integers of size %"PRIu64" (> %zd) not supported\n",
909                            num, sizeof(uint64_t));
910                     return AVERROR_INVALIDDATA;
911                 }
912                 break;
913             }
914
915             /* we handle 4 byte IDs at max */
916             case EBML_ID_EBMLMAXIDLENGTH: {
917                 uint64_t num;
918
919                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
920                     return res;
921                 if (num > sizeof(uint32_t)) {
922                     av_log(matroska->ctx, AV_LOG_ERROR,
923                            "IDs of size %"PRIu64" (> %zu) not supported\n",
924                             num, sizeof(uint32_t));
925                     return AVERROR_INVALIDDATA;
926                 }
927                 break;
928             }
929
930             case EBML_ID_DOCTYPE: {
931                 char *text;
932
933                 if ((res = ebml_read_ascii(matroska, &id, &text)) < 0)
934                     return res;
935                 if (doctype) {
936                     if (*doctype)
937                         av_free(*doctype);
938                     *doctype = text;
939                 } else
940                     av_free(text);
941                 break;
942             }
943
944             case EBML_ID_DOCTYPEREADVERSION: {
945                 uint64_t num;
946
947                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
948                     return res;
949                 if (version)
950                     *version = num;
951                 break;
952             }
953
954             default:
955                 av_log(matroska->ctx, AV_LOG_INFO,
956                        "Unknown data type 0x%x in EBML header", id);
957                 /* pass-through */
958
959             case EBML_ID_VOID:
960             /* we ignore these two, as they don't tell us anything we
961              * care about */
962             case EBML_ID_EBMLVERSION:
963             case EBML_ID_DOCTYPEVERSION:
964                 res = ebml_read_skip (matroska);
965                 break;
966         }
967     }
968
969     return 0;
970 }
971
972 /*
973  * Put one packet in an application-supplied AVPacket struct.
974  * Returns 0 on success or -1 on failure.
975  */
976
977 static int
978 matroska_deliver_packet (MatroskaDemuxContext *matroska,
979                          AVPacket             *pkt)
980 {
981     if (matroska->num_packets > 0) {
982         memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
983         av_free(matroska->packets[0]);
984         if (matroska->num_packets > 1) {
985             memmove(&matroska->packets[0], &matroska->packets[1],
986                     (matroska->num_packets - 1) * sizeof(AVPacket *));
987             matroska->packets =
988                 av_realloc(matroska->packets, (matroska->num_packets - 1) *
989                            sizeof(AVPacket *));
990         } else {
991             av_free(matroska->packets);
992             matroska->packets = NULL;
993         }
994         matroska->num_packets--;
995         return 0;
996     }
997
998     return -1;
999 }
1000
1001 /*
1002  * Put a packet into our internal queue. Will be delivered to the
1003  * user/application during the next get_packet() call.
1004  */
1005
1006 static void
1007 matroska_queue_packet (MatroskaDemuxContext *matroska,
1008                        AVPacket             *pkt)
1009 {
1010     matroska->packets =
1011         av_realloc(matroska->packets, (matroska->num_packets + 1) *
1012                    sizeof(AVPacket *));
1013     matroska->packets[matroska->num_packets] = pkt;
1014     matroska->num_packets++;
1015 }
1016
1017 /*
1018  * Autodetecting...
1019  */
1020
1021 static int
1022 matroska_probe (AVProbeData *p)
1023 {
1024     uint64_t total = 0;
1025     int len_mask = 0x80, size = 1, n = 1;
1026     uint8_t probe_data[] = { 'm', 'a', 't', 'r', 'o', 's', 'k', 'a' };
1027
1028     if (p->buf_size < 5)
1029         return 0;
1030
1031     /* ebml header? */
1032     if ((p->buf[0] << 24 | p->buf[1] << 16 |
1033          p->buf[2] << 8 | p->buf[3]) != EBML_ID_HEADER)
1034         return 0;
1035
1036     /* length of header */
1037     total = p->buf[4];
1038     while (size <= 8 && !(total & len_mask)) {
1039         size++;
1040         len_mask >>= 1;
1041     }
1042     if (size > 8)
1043       return 0;
1044     total &= (len_mask - 1);
1045     while (n < size)
1046         total = (total << 8) | p->buf[4 + n++];
1047
1048     /* does the probe data contain the whole header? */
1049     if (p->buf_size < 4 + size + total)
1050       return 0;
1051
1052     /* the header must contain the document type 'matroska'. For now,
1053      * we don't parse the whole header but simply check for the
1054      * availability of that array of characters inside the header.
1055      * Not fully fool-proof, but good enough. */
1056     for (n = 4 + size; n < 4 + size + total - sizeof(probe_data); n++)
1057         if (!memcmp (&p->buf[n], probe_data, sizeof(probe_data)))
1058             return AVPROBE_SCORE_MAX;
1059
1060     return 0;
1061 }
1062
1063 /*
1064  * From here on, it's all XML-style DTD stuff... Needs no comments.
1065  */
1066
1067 static int
1068 matroska_parse_info (MatroskaDemuxContext *matroska)
1069 {
1070     int res = 0;
1071     uint32_t id;
1072
1073     av_log(matroska->ctx, AV_LOG_DEBUG, "Parsing info...\n");
1074
1075     while (res == 0) {
1076         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1077             res = AVERROR_IO;
1078             break;
1079         } else if (matroska->level_up) {
1080             matroska->level_up--;
1081             break;
1082         }
1083
1084         switch (id) {
1085             /* cluster timecode */
1086             case MATROSKA_ID_TIMECODESCALE: {
1087                 uint64_t num;
1088                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1089                     break;
1090                 matroska->time_scale = num;
1091                 break;
1092             }
1093
1094             case MATROSKA_ID_DURATION: {
1095                 double num;
1096                 if ((res = ebml_read_float(matroska, &id, &num)) < 0)
1097                     break;
1098                 matroska->ctx->duration = num * matroska->time_scale * 1000 / AV_TIME_BASE;
1099                 break;
1100             }
1101
1102             case MATROSKA_ID_WRITINGAPP: {
1103                 char *text;
1104                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1105                     break;
1106                 matroska->writing_app = text;
1107                 break;
1108             }
1109
1110             case MATROSKA_ID_MUXINGAPP: {
1111                 char *text;
1112                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1113                     break;
1114                 matroska->muxing_app = text;
1115                 break;
1116             }
1117
1118             case MATROSKA_ID_DATEUTC: {
1119                 int64_t time;
1120                 if ((res = ebml_read_date(matroska, &id, &time)) < 0)
1121                     break;
1122                 matroska->created = time;
1123                 break;
1124             }
1125
1126             default:
1127                 av_log(matroska->ctx, AV_LOG_INFO,
1128                        "Unknown entry 0x%x in info header\n", id);
1129                 /* fall-through */
1130
1131             case EBML_ID_VOID:
1132                 res = ebml_read_skip(matroska);
1133                 break;
1134         }
1135
1136         if (matroska->level_up) {
1137             matroska->level_up--;
1138             break;
1139         }
1140     }
1141
1142     return res;
1143 }
1144
1145 static int
1146 matroska_add_stream (MatroskaDemuxContext *matroska)
1147 {
1148     int res = 0;
1149     uint32_t id;
1150     MatroskaTrack *track;
1151
1152     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing track, adding stream..,\n");
1153
1154     /* Allocate a generic track. As soon as we know its type we'll realloc. */
1155     track = av_mallocz(sizeof(MatroskaTrack));
1156     matroska->num_tracks++;
1157
1158     /* start with the master */
1159     if ((res = ebml_read_master(matroska, &id)) < 0)
1160         return res;
1161
1162     /* try reading the trackentry headers */
1163     while (res == 0) {
1164         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1165             res = AVERROR_IO;
1166             break;
1167         } else if (matroska->level_up > 0) {
1168             matroska->level_up--;
1169             break;
1170         }
1171
1172         switch (id) {
1173             /* track number (unique stream ID) */
1174             case MATROSKA_ID_TRACKNUMBER: {
1175                 uint64_t num;
1176                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1177                     break;
1178                 track->num = num;
1179                 break;
1180             }
1181
1182             /* track UID (unique identifier) */
1183             case MATROSKA_ID_TRACKUID: {
1184                 uint64_t num;
1185                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1186                     break;
1187                 track->uid = num;
1188                 break;
1189             }
1190
1191             /* track type (video, audio, combined, subtitle, etc.) */
1192             case MATROSKA_ID_TRACKTYPE: {
1193                 uint64_t num;
1194                 if (track->type != 0) {
1195                     av_log(matroska->ctx, AV_LOG_INFO,
1196                            "More than one tracktype in an entry - skip\n");
1197                     break;
1198                 }
1199                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1200                     break;
1201                 track->type = num;
1202
1203                 /* ok, so we're actually going to reallocate this thing */
1204                 switch (track->type) {
1205                     case MATROSKA_TRACK_TYPE_VIDEO:
1206                         track = (MatroskaTrack *)
1207                             av_realloc(track, sizeof(MatroskaVideoTrack));
1208                         break;
1209                     case MATROSKA_TRACK_TYPE_AUDIO:
1210                         track = (MatroskaTrack *)
1211                             av_realloc(track, sizeof(MatroskaAudioTrack));
1212                         ((MatroskaAudioTrack *)track)->channels = 1;
1213                         ((MatroskaAudioTrack *)track)->samplerate = 8000;
1214                         break;
1215                     case MATROSKA_TRACK_TYPE_SUBTITLE:
1216                         track = (MatroskaTrack *)
1217                             av_realloc(track, sizeof(MatroskaSubtitleTrack));
1218                         break;
1219                     case MATROSKA_TRACK_TYPE_COMPLEX:
1220                     case MATROSKA_TRACK_TYPE_LOGO:
1221                     case MATROSKA_TRACK_TYPE_CONTROL:
1222                     default:
1223                         av_log(matroska->ctx, AV_LOG_INFO,
1224                                "Unknown or unsupported track type 0x%x\n",
1225                                track->type);
1226                         track->type = 0;
1227                         break;
1228                 }
1229                 matroska->tracks[matroska->num_tracks - 1] = track;
1230                 break;
1231             }
1232
1233             /* tracktype specific stuff for video */
1234             case MATROSKA_ID_TRACKVIDEO: {
1235                 MatroskaVideoTrack *videotrack;
1236                 if (track->type != MATROSKA_TRACK_TYPE_VIDEO) {
1237                     av_log(matroska->ctx, AV_LOG_INFO,
1238                            "video data in non-video track - ignoring\n");
1239                     res = AVERROR_INVALIDDATA;
1240                     break;
1241                 } else if ((res = ebml_read_master(matroska, &id)) < 0)
1242                     break;
1243                 videotrack = (MatroskaVideoTrack *)track;
1244
1245                 while (res == 0) {
1246                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1247                         res = AVERROR_IO;
1248                         break;
1249                     } else if (matroska->level_up > 0) {
1250                         matroska->level_up--;
1251                         break;
1252                     }
1253
1254                     switch (id) {
1255                         /* fixme, this should be one-up, but I get it here */
1256                         case MATROSKA_ID_TRACKDEFAULTDURATION: {
1257                             uint64_t num;
1258                             if ((res = ebml_read_uint (matroska, &id,
1259                                                        &num)) < 0)
1260                                 break;
1261                             track->default_duration = num;
1262                             break;
1263                         }
1264
1265                         /* video framerate */
1266                         case MATROSKA_ID_VIDEOFRAMERATE: {
1267                             double num;
1268                             if ((res = ebml_read_float(matroska, &id,
1269                                                        &num)) < 0)
1270                                 break;
1271                             track->default_duration = 1000000000 * (1. / num);
1272                             break;
1273                         }
1274
1275                         /* width of the size to display the video at */
1276                         case MATROSKA_ID_VIDEODISPLAYWIDTH: {
1277                             uint64_t num;
1278                             if ((res = ebml_read_uint(matroska, &id,
1279                                                       &num)) < 0)
1280                                 break;
1281                             videotrack->display_width = num;
1282                             break;
1283                         }
1284
1285                         /* height of the size to display the video at */
1286                         case MATROSKA_ID_VIDEODISPLAYHEIGHT: {
1287                             uint64_t num;
1288                             if ((res = ebml_read_uint(matroska, &id,
1289                                                       &num)) < 0)
1290                                 break;
1291                             videotrack->display_height = num;
1292                             break;
1293                         }
1294
1295                         /* width of the video in the file */
1296                         case MATROSKA_ID_VIDEOPIXELWIDTH: {
1297                             uint64_t num;
1298                             if ((res = ebml_read_uint(matroska, &id,
1299                                                       &num)) < 0)
1300                                 break;
1301                             videotrack->pixel_width = num;
1302                             break;
1303                         }
1304
1305                         /* height of the video in the file */
1306                         case MATROSKA_ID_VIDEOPIXELHEIGHT: {
1307                             uint64_t num;
1308                             if ((res = ebml_read_uint(matroska, &id,
1309                                                       &num)) < 0)
1310                                 break;
1311                             videotrack->pixel_height = num;
1312                             break;
1313                         }
1314
1315                         /* whether the video is interlaced */
1316                         case MATROSKA_ID_VIDEOFLAGINTERLACED: {
1317                             uint64_t num;
1318                             if ((res = ebml_read_uint(matroska, &id,
1319                                                       &num)) < 0)
1320                                 break;
1321                             if (num)
1322                                 track->flags |=
1323                                     MATROSKA_VIDEOTRACK_INTERLACED;
1324                             else
1325                                 track->flags &=
1326                                     ~MATROSKA_VIDEOTRACK_INTERLACED;
1327                             break;
1328                         }
1329
1330                         /* stereo mode (whether the video has two streams,
1331                          * where one is for the left eye and the other for
1332                          * the right eye, which creates a 3D-like
1333                          * effect) */
1334                         case MATROSKA_ID_VIDEOSTEREOMODE: {
1335                             uint64_t num;
1336                             if ((res = ebml_read_uint(matroska, &id,
1337                                                       &num)) < 0)
1338                                 break;
1339                             if (num != MATROSKA_EYE_MODE_MONO &&
1340                                 num != MATROSKA_EYE_MODE_LEFT &&
1341                                 num != MATROSKA_EYE_MODE_RIGHT &&
1342                                 num != MATROSKA_EYE_MODE_BOTH) {
1343                                 av_log(matroska->ctx, AV_LOG_INFO,
1344                                        "Ignoring unknown eye mode 0x%x\n",
1345                                        (uint32_t) num);
1346                                 break;
1347                             }
1348                             videotrack->eye_mode = num;
1349                             break;
1350                         }
1351
1352                         /* aspect ratio behaviour */
1353                         case MATROSKA_ID_VIDEOASPECTRATIO: {
1354                             uint64_t num;
1355                             if ((res = ebml_read_uint(matroska, &id,
1356                                                       &num)) < 0)
1357                                 break;
1358                             if (num != MATROSKA_ASPECT_RATIO_MODE_FREE &&
1359                                 num != MATROSKA_ASPECT_RATIO_MODE_KEEP &&
1360                                 num != MATROSKA_ASPECT_RATIO_MODE_FIXED) {
1361                                 av_log(matroska->ctx, AV_LOG_INFO,
1362                                        "Ignoring unknown aspect ratio 0x%x\n",
1363                                        (uint32_t) num);
1364                                 break;
1365                             }
1366                             videotrack->ar_mode = num;
1367                             break;
1368                         }
1369
1370                         /* colourspace (only matters for raw video)
1371                          * fourcc */
1372                         case MATROSKA_ID_VIDEOCOLOURSPACE: {
1373                             uint64_t num;
1374                             if ((res = ebml_read_uint(matroska, &id,
1375                                                       &num)) < 0)
1376                                 break;
1377                             videotrack->fourcc = num;
1378                             break;
1379                         }
1380
1381                         default:
1382                             av_log(matroska->ctx, AV_LOG_INFO,
1383                                    "Unknown video track header entry "
1384                                    "0x%x - ignoring\n", id);
1385                             /* pass-through */
1386
1387                         case EBML_ID_VOID:
1388                             res = ebml_read_skip(matroska);
1389                             break;
1390                     }
1391
1392                     if (matroska->level_up) {
1393                         matroska->level_up--;
1394                         break;
1395                     }
1396                 }
1397                 break;
1398             }
1399
1400             /* tracktype specific stuff for audio */
1401             case MATROSKA_ID_TRACKAUDIO: {
1402                 MatroskaAudioTrack *audiotrack;
1403                 if (track->type != MATROSKA_TRACK_TYPE_AUDIO) {
1404                     av_log(matroska->ctx, AV_LOG_INFO,
1405                            "audio data in non-audio track - ignoring\n");
1406                     res = AVERROR_INVALIDDATA;
1407                     break;
1408                 } else if ((res = ebml_read_master(matroska, &id)) < 0)
1409                     break;
1410                 audiotrack = (MatroskaAudioTrack *)track;
1411
1412                 while (res == 0) {
1413                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1414                         res = AVERROR_IO;
1415                         break;
1416                     } else if (matroska->level_up > 0) {
1417                         matroska->level_up--;
1418                         break;
1419                     }
1420
1421                     switch (id) {
1422                         /* samplerate */
1423                         case MATROSKA_ID_AUDIOSAMPLINGFREQ: {
1424                             double num;
1425                             if ((res = ebml_read_float(matroska, &id,
1426                                                        &num)) < 0)
1427                                 break;
1428                             audiotrack->samplerate = num;
1429                             break;
1430                         }
1431
1432                             /* bitdepth */
1433                         case MATROSKA_ID_AUDIOBITDEPTH: {
1434                             uint64_t num;
1435                             if ((res = ebml_read_uint(matroska, &id,
1436                                                       &num)) < 0)
1437                                 break;
1438                             audiotrack->bitdepth = num;
1439                             break;
1440                         }
1441
1442                             /* channels */
1443                         case MATROSKA_ID_AUDIOCHANNELS: {
1444                             uint64_t num;
1445                             if ((res = ebml_read_uint(matroska, &id,
1446                                                       &num)) < 0)
1447                                 break;
1448                             audiotrack->channels = num;
1449                             break;
1450                         }
1451
1452                         default:
1453                             av_log(matroska->ctx, AV_LOG_INFO,
1454                                    "Unknown audio track header entry "
1455                                    "0x%x - ignoring\n", id);
1456                             /* pass-through */
1457
1458                         case EBML_ID_VOID:
1459                             res = ebml_read_skip(matroska);
1460                             break;
1461                     }
1462
1463                     if (matroska->level_up) {
1464                         matroska->level_up--;
1465                         break;
1466                     }
1467                 }
1468                 break;
1469             }
1470
1471                 /* codec identifier */
1472             case MATROSKA_ID_CODECID: {
1473                 char *text;
1474                 if ((res = ebml_read_ascii(matroska, &id, &text)) < 0)
1475                     break;
1476                 track->codec_id = text;
1477                 break;
1478             }
1479
1480                 /* codec private data */
1481             case MATROSKA_ID_CODECPRIVATE: {
1482                 uint8_t *data;
1483                 int size;
1484                 if ((res = ebml_read_binary(matroska, &id, &data, &size) < 0))
1485                     break;
1486                 track->codec_priv = data;
1487                 track->codec_priv_size = size;
1488                 break;
1489             }
1490
1491                 /* name of the codec */
1492             case MATROSKA_ID_CODECNAME: {
1493                 char *text;
1494                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1495                     break;
1496                 track->codec_name = text;
1497                 break;
1498             }
1499
1500                 /* name of this track */
1501             case MATROSKA_ID_TRACKNAME: {
1502                 char *text;
1503                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1504                     break;
1505                 track->name = text;
1506                 break;
1507             }
1508
1509                 /* language (matters for audio/subtitles, mostly) */
1510             case MATROSKA_ID_TRACKLANGUAGE: {
1511                 char *text;
1512                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1513                     break;
1514                 track->language = text;
1515                 break;
1516             }
1517
1518                 /* whether this is actually used */
1519             case MATROSKA_ID_TRACKFLAGENABLED: {
1520                 uint64_t num;
1521                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1522                     break;
1523                 if (num)
1524                     track->flags |= MATROSKA_TRACK_ENABLED;
1525                 else
1526                     track->flags &= ~MATROSKA_TRACK_ENABLED;
1527                 break;
1528             }
1529
1530                 /* whether it's the default for this track type */
1531             case MATROSKA_ID_TRACKFLAGDEFAULT: {
1532                 uint64_t num;
1533                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1534                     break;
1535                 if (num)
1536                     track->flags |= MATROSKA_TRACK_DEFAULT;
1537                 else
1538                     track->flags &= ~MATROSKA_TRACK_DEFAULT;
1539                 break;
1540             }
1541
1542                 /* lacing (like MPEG, where blocks don't end/start on frame
1543                  * boundaries) */
1544             case MATROSKA_ID_TRACKFLAGLACING: {
1545                 uint64_t num;
1546                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1547                     break;
1548                 if (num)
1549                     track->flags |= MATROSKA_TRACK_LACING;
1550                 else
1551                     track->flags &= ~MATROSKA_TRACK_LACING;
1552                 break;
1553             }
1554
1555                 /* default length (in time) of one data block in this track */
1556             case MATROSKA_ID_TRACKDEFAULTDURATION: {
1557                 uint64_t num;
1558                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1559                     break;
1560                 track->default_duration = num;
1561                 break;
1562             }
1563
1564             default:
1565                 av_log(matroska->ctx, AV_LOG_INFO,
1566                        "Unknown track header entry 0x%x - ignoring\n", id);
1567                 /* pass-through */
1568
1569             case EBML_ID_VOID:
1570             /* we ignore these because they're nothing useful. */
1571             case MATROSKA_ID_CODECINFOURL:
1572             case MATROSKA_ID_CODECDOWNLOADURL:
1573             case MATROSKA_ID_TRACKMINCACHE:
1574             case MATROSKA_ID_TRACKMAXCACHE:
1575                 res = ebml_read_skip(matroska);
1576                 break;
1577         }
1578
1579         if (matroska->level_up) {
1580             matroska->level_up--;
1581             break;
1582         }
1583     }
1584
1585     return res;
1586 }
1587
1588 static int
1589 matroska_parse_tracks (MatroskaDemuxContext *matroska)
1590 {
1591     int res = 0;
1592     uint32_t id;
1593
1594     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing tracks...\n");
1595
1596     while (res == 0) {
1597         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1598             res = AVERROR_IO;
1599             break;
1600         } else if (matroska->level_up) {
1601             matroska->level_up--;
1602             break;
1603         }
1604
1605         switch (id) {
1606             /* one track within the "all-tracks" header */
1607             case MATROSKA_ID_TRACKENTRY:
1608                 res = matroska_add_stream(matroska);
1609                 break;
1610
1611             default:
1612                 av_log(matroska->ctx, AV_LOG_INFO,
1613                        "Unknown entry 0x%x in track header\n", id);
1614                 /* fall-through */
1615
1616             case EBML_ID_VOID:
1617                 res = ebml_read_skip(matroska);
1618                 break;
1619         }
1620
1621         if (matroska->level_up) {
1622             matroska->level_up--;
1623             break;
1624         }
1625     }
1626
1627     return res;
1628 }
1629
1630 static int
1631 matroska_parse_index (MatroskaDemuxContext *matroska)
1632 {
1633     int res = 0;
1634     uint32_t id;
1635     MatroskaDemuxIndex idx;
1636
1637     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing index...\n");
1638
1639     while (res == 0) {
1640         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1641             res = AVERROR_IO;
1642             break;
1643         } else if (matroska->level_up) {
1644             matroska->level_up--;
1645             break;
1646         }
1647
1648         switch (id) {
1649             /* one single index entry ('point') */
1650             case MATROSKA_ID_POINTENTRY:
1651                 if ((res = ebml_read_master(matroska, &id)) < 0)
1652                     break;
1653
1654                 /* in the end, we hope to fill one entry with a
1655                  * timestamp, a file position and a tracknum */
1656                 idx.pos   = (uint64_t) -1;
1657                 idx.time  = (uint64_t) -1;
1658                 idx.track = (uint16_t) -1;
1659
1660                 while (res == 0) {
1661                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1662                         res = AVERROR_IO;
1663                         break;
1664                     } else if (matroska->level_up) {
1665                         matroska->level_up--;
1666                         break;
1667                     }
1668
1669                     switch (id) {
1670                         /* one single index entry ('point') */
1671                         case MATROSKA_ID_CUETIME: {
1672                             uint64_t time;
1673                             if ((res = ebml_read_uint(matroska, &id,
1674                                                       &time)) < 0)
1675                                 break;
1676                             idx.time = time * matroska->time_scale;
1677                             break;
1678                         }
1679
1680                         /* position in the file + track to which it
1681                          * belongs */
1682                         case MATROSKA_ID_CUETRACKPOSITION:
1683                             if ((res = ebml_read_master(matroska, &id)) < 0)
1684                                 break;
1685
1686                             while (res == 0) {
1687                                 if (!(id = ebml_peek_id (matroska,
1688                                                     &matroska->level_up))) {
1689                                     res = AVERROR_IO;
1690                                     break;
1691                                 } else if (matroska->level_up) {
1692                                     matroska->level_up--;
1693                                     break;
1694                                 }
1695
1696                                 switch (id) {
1697                                     /* track number */
1698                                     case MATROSKA_ID_CUETRACK: {
1699                                         uint64_t num;
1700                                         if ((res = ebml_read_uint(matroska,
1701                                                           &id, &num)) < 0)
1702                                             break;
1703                                         idx.track = num;
1704                                         break;
1705                                     }
1706
1707                                         /* position in file */
1708                                     case MATROSKA_ID_CUECLUSTERPOSITION: {
1709                                         uint64_t num;
1710                                         if ((res = ebml_read_uint(matroska,
1711                                                           &id, &num)) < 0)
1712                                             break;
1713                                         idx.pos = num;
1714                                         break;
1715                                     }
1716
1717                                     default:
1718                                         av_log(matroska->ctx, AV_LOG_INFO,
1719                                                "Unknown entry 0x%x in "
1720                                                "CuesTrackPositions\n", id);
1721                                         /* fall-through */
1722
1723                                     case EBML_ID_VOID:
1724                                         res = ebml_read_skip(matroska);
1725                                         break;
1726                                 }
1727
1728                                 if (matroska->level_up) {
1729                                     matroska->level_up--;
1730                                     break;
1731                                 }
1732                             }
1733
1734                             break;
1735
1736                         default:
1737                             av_log(matroska->ctx, AV_LOG_INFO,
1738                                    "Unknown entry 0x%x in cuespoint "
1739                                    "index\n", id);
1740                             /* fall-through */
1741
1742                         case EBML_ID_VOID:
1743                             res = ebml_read_skip(matroska);
1744                             break;
1745                     }
1746
1747                     if (matroska->level_up) {
1748                         matroska->level_up--;
1749                         break;
1750                     }
1751                 }
1752
1753                 /* so let's see if we got what we wanted */
1754                 if (idx.pos   != (uint64_t) -1 &&
1755                     idx.time  != (uint64_t) -1 &&
1756                     idx.track != (uint16_t) -1) {
1757                     if (matroska->num_indexes % 32 == 0) {
1758                         /* re-allocate bigger index */
1759                         matroska->index =
1760                             av_realloc(matroska->index,
1761                                        (matroska->num_indexes + 32) *
1762                                        sizeof(MatroskaDemuxIndex));
1763                     }
1764                     matroska->index[matroska->num_indexes] = idx;
1765                     matroska->num_indexes++;
1766                 }
1767                 break;
1768
1769             default:
1770                 av_log(matroska->ctx, AV_LOG_INFO,
1771                        "Unknown entry 0x%x in cues header\n", id);
1772                 /* fall-through */
1773
1774             case EBML_ID_VOID:
1775                 res = ebml_read_skip(matroska);
1776                 break;
1777         }
1778
1779         if (matroska->level_up) {
1780             matroska->level_up--;
1781             break;
1782         }
1783     }
1784
1785     return res;
1786 }
1787
1788 static int
1789 matroska_parse_metadata (MatroskaDemuxContext *matroska)
1790 {
1791     int res = 0;
1792     uint32_t id;
1793
1794     while (res == 0) {
1795         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1796             res = AVERROR_IO;
1797             break;
1798         } else if (matroska->level_up) {
1799             matroska->level_up--;
1800             break;
1801         }
1802
1803         switch (id) {
1804             /* Hm, this is unsupported... */
1805             default:
1806                 av_log(matroska->ctx, AV_LOG_INFO,
1807                        "Unknown entry 0x%x in metadata header\n", id);
1808                 /* fall-through */
1809
1810             case EBML_ID_VOID:
1811                 res = ebml_read_skip(matroska);
1812                 break;
1813         }
1814
1815         if (matroska->level_up) {
1816             matroska->level_up--;
1817             break;
1818         }
1819     }
1820
1821     return res;
1822 }
1823
1824 static int
1825 matroska_parse_seekhead (MatroskaDemuxContext *matroska)
1826 {
1827     int res = 0;
1828     uint32_t id;
1829
1830     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing seekhead...\n");
1831
1832     while (res == 0) {
1833         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1834             res = AVERROR_IO;
1835             break;
1836         } else if (matroska->level_up) {
1837             matroska->level_up--;
1838             break;
1839         }
1840
1841         switch (id) {
1842             case MATROSKA_ID_SEEKENTRY: {
1843                 uint32_t seek_id = 0, peek_id_cache = 0;
1844                 uint64_t seek_pos = (uint64_t) -1, t;
1845
1846                 if ((res = ebml_read_master(matroska, &id)) < 0)
1847                     break;
1848
1849                 while (res == 0) {
1850                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1851                         res = AVERROR_IO;
1852                         break;
1853                     } else if (matroska->level_up) {
1854                         matroska->level_up--;
1855                         break;
1856                     }
1857
1858                     switch (id) {
1859                         case MATROSKA_ID_SEEKID:
1860                             res = ebml_read_uint(matroska, &id, &t);
1861                             seek_id = t;
1862                             break;
1863
1864                         case MATROSKA_ID_SEEKPOSITION:
1865                             res = ebml_read_uint(matroska, &id, &seek_pos);
1866                             break;
1867
1868                         default:
1869                             av_log(matroska->ctx, AV_LOG_INFO,
1870                                    "Unknown seekhead ID 0x%x\n", id);
1871                             /* fall-through */
1872
1873                         case EBML_ID_VOID:
1874                             res = ebml_read_skip(matroska);
1875                             break;
1876                     }
1877
1878                     if (matroska->level_up) {
1879                         matroska->level_up--;
1880                         break;
1881                     }
1882                 }
1883
1884                 if (!seek_id || seek_pos == (uint64_t) -1) {
1885                     av_log(matroska->ctx, AV_LOG_INFO,
1886                            "Incomplete seekhead entry (0x%x/%"PRIu64")\n",
1887                            seek_id, seek_pos);
1888                     break;
1889                 }
1890
1891                 switch (seek_id) {
1892                     case MATROSKA_ID_CUES:
1893                     case MATROSKA_ID_TAGS: {
1894                         uint32_t level_up = matroska->level_up;
1895                         offset_t before_pos;
1896                         uint64_t length;
1897                         MatroskaLevel level;
1898
1899                         /* remember the peeked ID and the current position */
1900                         peek_id_cache = matroska->peek_id;
1901                         before_pos = url_ftell(&matroska->ctx->pb);
1902
1903                         /* seek */
1904                         if ((res = ebml_read_seek(matroska, seek_pos +
1905                                                matroska->segment_start)) < 0)
1906                             return res;
1907
1908                         /* we don't want to lose our seekhead level, so we add
1909                          * a dummy. This is a crude hack. */
1910                         if (matroska->num_levels == EBML_MAX_DEPTH) {
1911                             av_log(matroska->ctx, AV_LOG_INFO,
1912                                    "Max EBML element depth (%d) reached, "
1913                                    "cannot parse further.\n", EBML_MAX_DEPTH);
1914                             return AVERROR_UNKNOWN;
1915                         }
1916
1917                         level.start = 0;
1918                         level.length = (uint64_t)-1;
1919                         matroska->levels[matroska->num_levels] = level;
1920                         matroska->num_levels++;
1921
1922                         /* check ID */
1923                         if (!(id = ebml_peek_id (matroska,
1924                                                  &matroska->level_up)))
1925                             break;
1926                         if (id != seek_id) {
1927                             av_log(matroska->ctx, AV_LOG_INFO,
1928                                    "We looked for ID=0x%x but got "
1929                                    "ID=0x%x (pos=%"PRIu64")",
1930                                    seek_id, id, seek_pos +
1931                                    matroska->segment_start);
1932                             goto finish;
1933                         }
1934
1935                         /* read master + parse */
1936                         if ((res = ebml_read_master(matroska, &id)) < 0)
1937                             break;
1938                         switch (id) {
1939                             case MATROSKA_ID_CUES:
1940                                 if (!(res = matroska_parse_index(matroska)) ||
1941                                     url_feof(&matroska->ctx->pb)) {
1942                                     matroska->index_parsed = 1;
1943                                     res = 0;
1944                                 }
1945                                 break;
1946                             case MATROSKA_ID_TAGS:
1947                                 if (!(res = matroska_parse_metadata(matroska)) ||
1948                                    url_feof(&matroska->ctx->pb)) {
1949                                     matroska->metadata_parsed = 1;
1950                                     res = 0;
1951                                 }
1952                                 break;
1953                         }
1954                         if (res < 0)
1955                             break;
1956
1957                     finish:
1958                         /* remove dummy level */
1959                         while (matroska->num_levels) {
1960                             matroska->num_levels--;
1961                             length =
1962                                 matroska->levels[matroska->num_levels].length;
1963                             if (length == (uint64_t)-1)
1964                                 break;
1965                         }
1966
1967                         /* seek back */
1968                         if ((res = ebml_read_seek(matroska, before_pos)) < 0)
1969                             return res;
1970                         matroska->peek_id = peek_id_cache;
1971                         matroska->level_up = level_up;
1972                         break;
1973                     }
1974
1975                     default:
1976                         av_log(matroska->ctx, AV_LOG_INFO,
1977                                "Ignoring seekhead entry for ID=0x%x\n",
1978                                seek_id);
1979                         break;
1980                 }
1981
1982                 break;
1983             }
1984
1985             default:
1986                 av_log(matroska->ctx, AV_LOG_INFO,
1987                        "Unknown seekhead ID 0x%x\n", id);
1988                 /* fall-through */
1989
1990             case EBML_ID_VOID:
1991                 res = ebml_read_skip(matroska);
1992                 break;
1993         }
1994
1995         if (matroska->level_up) {
1996             matroska->level_up--;
1997             break;
1998         }
1999     }
2000
2001     return res;
2002 }
2003
2004 static int
2005 matroska_read_header (AVFormatContext    *s,
2006                       AVFormatParameters *ap)
2007 {
2008     MatroskaDemuxContext *matroska = s->priv_data;
2009     char *doctype;
2010     int version, last_level, res = 0;
2011     uint32_t id;
2012
2013     matroska->ctx = s;
2014
2015     /* First read the EBML header. */
2016     doctype = NULL;
2017     if ((res = ebml_read_header(matroska, &doctype, &version)) < 0)
2018         return res;
2019     if ((doctype == NULL) || strcmp(doctype, "matroska")) {
2020         av_log(matroska->ctx, AV_LOG_ERROR,
2021                "Wrong EBML doctype ('%s' != 'matroska').\n",
2022                doctype ? doctype : "(none)");
2023         if (doctype)
2024             av_free(doctype);
2025         return AVERROR_NOFMT;
2026     }
2027     av_free(doctype);
2028     if (version != 1) {
2029         av_log(matroska->ctx, AV_LOG_ERROR,
2030                "Matroska demuxer version 1 too old for file version %d\n",
2031                version);
2032         return AVERROR_NOFMT;
2033     }
2034
2035     /* The next thing is a segment. */
2036     while (1) {
2037         if (!(id = ebml_peek_id(matroska, &last_level)))
2038             return AVERROR_IO;
2039         if (id == MATROSKA_ID_SEGMENT)
2040             break;
2041
2042         /* oi! */
2043         av_log(matroska->ctx, AV_LOG_INFO,
2044                "Expected a Segment ID (0x%x), but received 0x%x!\n",
2045                MATROSKA_ID_SEGMENT, id);
2046         if ((res = ebml_read_skip(matroska)) < 0)
2047             return res;
2048     }
2049
2050     /* We now have a Matroska segment.
2051      * Seeks are from the beginning of the segment,
2052      * after the segment ID/length. */
2053     if ((res = ebml_read_master(matroska, &id)) < 0)
2054         return res;
2055     matroska->segment_start = url_ftell(&s->pb);
2056
2057     matroska->time_scale = 1000000;
2058     /* we've found our segment, start reading the different contents in here */
2059     while (res == 0) {
2060         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2061             res = AVERROR_IO;
2062             break;
2063         } else if (matroska->level_up) {
2064             matroska->level_up--;
2065             break;
2066         }
2067
2068         switch (id) {
2069             /* stream info */
2070             case MATROSKA_ID_INFO: {
2071                 if ((res = ebml_read_master(matroska, &id)) < 0)
2072                     break;
2073                 res = matroska_parse_info(matroska);
2074                 break;
2075             }
2076
2077             /* track info headers */
2078             case MATROSKA_ID_TRACKS: {
2079                 if ((res = ebml_read_master(matroska, &id)) < 0)
2080                     break;
2081                 res = matroska_parse_tracks(matroska);
2082                 break;
2083             }
2084
2085             /* stream index */
2086             case MATROSKA_ID_CUES: {
2087                 if (!matroska->index_parsed) {
2088                     if ((res = ebml_read_master(matroska, &id)) < 0)
2089                         break;
2090                     res = matroska_parse_index(matroska);
2091                 } else
2092                     res = ebml_read_skip(matroska);
2093                 break;
2094             }
2095
2096             /* metadata */
2097             case MATROSKA_ID_TAGS: {
2098                 if (!matroska->metadata_parsed) {
2099                     if ((res = ebml_read_master(matroska, &id)) < 0)
2100                         break;
2101                     res = matroska_parse_metadata(matroska);
2102                 } else
2103                     res = ebml_read_skip(matroska);
2104                 break;
2105             }
2106
2107             /* file index (if seekable, seek to Cues/Tags to parse it) */
2108             case MATROSKA_ID_SEEKHEAD: {
2109                 if ((res = ebml_read_master(matroska, &id)) < 0)
2110                     break;
2111                 res = matroska_parse_seekhead(matroska);
2112                 break;
2113             }
2114
2115             case MATROSKA_ID_CLUSTER: {
2116                 /* Do not read the master - this will be done in the next
2117                  * call to matroska_read_packet. */
2118                 res = 1;
2119                 break;
2120             }
2121
2122             default:
2123                 av_log(matroska->ctx, AV_LOG_INFO,
2124                        "Unknown matroska file header ID 0x%x\n", id);
2125             /* fall-through */
2126
2127             case EBML_ID_VOID:
2128                 res = ebml_read_skip(matroska);
2129                 break;
2130         }
2131
2132         if (matroska->level_up) {
2133             matroska->level_up--;
2134             break;
2135         }
2136     }
2137
2138     if (res < 0)
2139         return res;
2140
2141     /* Have we found a cluster? */
2142     if (res == 1) {
2143         int i, j;
2144         enum CodecID codec_id= CODEC_ID_NONE;
2145         MatroskaTrack *track;
2146         AVStream *st;
2147
2148         for (i = 0; i < matroska->num_tracks; i++) {
2149             void *extradata = NULL;
2150             int extradata_size = 0;
2151             track = matroska->tracks[i];
2152
2153             /* libavformat does not really support subtitles.
2154              * Also apply some sanity checks. */
2155             if ((track->type == MATROSKA_TRACK_TYPE_SUBTITLE) ||
2156                 (track->codec_id == NULL))
2157                 continue;
2158
2159             for(j=0; codec_tags[j].str; j++){
2160                 if(!strcmp(codec_tags[j].str, track->codec_id)){
2161                     codec_id= codec_tags[j].id;
2162                     break;
2163                 }
2164             }
2165
2166             /* Set the FourCC from the CodecID. */
2167             /* This is the MS compatibility mode which stores a
2168              * BITMAPINFOHEADER in the CodecPrivate. */
2169             if (!strcmp(track->codec_id,
2170                         MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC) &&
2171                 (track->codec_priv_size >= 40) &&
2172                 (track->codec_priv != NULL)) {
2173                 unsigned char *p;
2174
2175                 /* Offset of biCompression. Stored in LE. */
2176                 p = (unsigned char *)track->codec_priv + 16;
2177                 ((MatroskaVideoTrack *)track)->fourcc = (p[3] << 24) |
2178                                  (p[2] << 16) | (p[1] << 8) | p[0];
2179                 codec_id = codec_get_bmp_id(((MatroskaVideoTrack *)track)->fourcc);
2180
2181             }
2182
2183             /* This is the MS compatibility mode which stores a
2184              * WAVEFORMATEX in the CodecPrivate. */
2185             else if (!strcmp(track->codec_id,
2186                              MATROSKA_CODEC_ID_AUDIO_ACM) &&
2187                 (track->codec_priv_size >= 18) &&
2188                 (track->codec_priv != NULL)) {
2189                 unsigned char *p;
2190                 uint16_t tag;
2191
2192                 /* Offset of wFormatTag. Stored in LE. */
2193                 p = (unsigned char *)track->codec_priv;
2194                 tag = (p[1] << 8) | p[0];
2195                 codec_id = codec_get_wav_id(tag);
2196
2197             }
2198
2199             if (codec_id == CODEC_ID_NONE) {
2200                 av_log(matroska->ctx, AV_LOG_INFO,
2201                        "Unknown/unsupported CodecID %s.\n",
2202                        track->codec_id);
2203             }
2204
2205             track->stream_index = matroska->num_streams;
2206
2207             matroska->num_streams++;
2208             st = av_new_stream(s, track->stream_index);
2209             if (st == NULL)
2210                 return AVERROR_NOMEM;
2211             av_set_pts_info(st, 64, matroska->time_scale, 1000*1000*1000); /* 64 bit pts in ns */
2212
2213             st->codec->codec_id = codec_id;
2214
2215             if(extradata){
2216                 st->codec->extradata = extradata;
2217                 st->codec->extradata_size = extradata_size;
2218             } else if(track->codec_priv && track->codec_priv_size > 0){
2219                 st->codec->extradata = av_malloc(track->codec_priv_size);
2220                 if(st->codec->extradata == NULL)
2221                     return AVERROR_NOMEM;
2222                 st->codec->extradata_size = track->codec_priv_size;
2223                 memcpy(st->codec->extradata, track->codec_priv,
2224                        track->codec_priv_size);
2225             }
2226
2227             if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
2228                 MatroskaVideoTrack *videotrack = (MatroskaVideoTrack *)track;
2229
2230                 st->codec->codec_type = CODEC_TYPE_VIDEO;
2231                 st->codec->codec_tag = videotrack->fourcc;
2232                 st->codec->width = videotrack->pixel_width;
2233                 st->codec->height = videotrack->pixel_height;
2234                 if (videotrack->display_width == 0)
2235                     videotrack->display_width= videotrack->pixel_width;
2236                 if (videotrack->display_height == 0)
2237                     videotrack->display_height= videotrack->pixel_height;
2238                 av_reduce(&st->codec->sample_aspect_ratio.num,
2239                           &st->codec->sample_aspect_ratio.den,
2240                           st->codec->height * videotrack->display_width,
2241                           st->codec-> width * videotrack->display_height,
2242                           255);
2243             } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
2244                 MatroskaAudioTrack *audiotrack = (MatroskaAudioTrack *)track;
2245
2246                 st->codec->codec_type = CODEC_TYPE_AUDIO;
2247                 st->codec->sample_rate = audiotrack->samplerate;
2248                 st->codec->channels = audiotrack->channels;
2249             } else if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE) {
2250                 st->codec->codec_type = CODEC_TYPE_SUBTITLE;
2251             }
2252
2253             /* What do we do with private data? E.g. for Vorbis. */
2254         }
2255     }
2256
2257     return 0;
2258 }
2259
2260 static int
2261 matroska_find_track_by_num (MatroskaDemuxContext *matroska,
2262                             int                   num)
2263 {
2264     int i;
2265
2266     for (i = 0; i < matroska->num_tracks; i++)
2267         if (matroska->tracks[i]->num == num)
2268             return i;
2269
2270     return -1;
2271 }
2272
2273 static int
2274 matroska_parse_blockgroup (MatroskaDemuxContext *matroska,
2275                            uint64_t              cluster_time)
2276 {
2277     int res = 0;
2278     uint32_t id;
2279     AVPacket *pkt;
2280     int is_keyframe = PKT_FLAG_KEY, last_num_packets = matroska->num_packets;
2281
2282     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing blockgroup...\n");
2283
2284     while (res == 0) {
2285         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2286             res = AVERROR_IO;
2287             break;
2288         } else if (matroska->level_up) {
2289             matroska->level_up--;
2290             break;
2291         }
2292
2293         switch (id) {
2294             /* one block inside the group. Note, block parsing is one
2295              * of the harder things, so this code is a bit complicated.
2296              * See http://www.matroska.org/ for documentation. */
2297             case MATROSKA_ID_BLOCK: {
2298                 uint8_t *data, *origdata;
2299                 int size;
2300                 int16_t block_time;
2301                 uint32_t *lace_size = NULL;
2302                 int n, track, flags, laces = 0;
2303                 uint64_t num;
2304                 int64_t pos= url_ftell(&matroska->ctx->pb);
2305
2306                 if ((res = ebml_read_binary(matroska, &id, &data, &size)) < 0)
2307                     break;
2308                 origdata = data;
2309
2310                 /* first byte(s): blocknum */
2311                 if ((n = matroska_ebmlnum_uint(data, size, &num)) < 0) {
2312                     av_log(matroska->ctx, AV_LOG_ERROR,
2313                            "EBML block data error\n");
2314                     av_free(origdata);
2315                     break;
2316                 }
2317                 data += n;
2318                 size -= n;
2319
2320                 /* fetch track from num */
2321                 track = matroska_find_track_by_num(matroska, num);
2322                 if (size <= 3 || track < 0 || track >= matroska->num_tracks) {
2323                     av_log(matroska->ctx, AV_LOG_INFO,
2324                            "Invalid stream %d or size %u\n", track, size);
2325                     av_free(origdata);
2326                     break;
2327                 }
2328                 if(matroska->ctx->streams[ matroska->tracks[track]->stream_index ]->discard >= AVDISCARD_ALL){
2329                     av_free(origdata);
2330                     break;
2331                 }
2332
2333                 /* block_time (relative to cluster time) */
2334                 block_time = (data[0] << 8) | data[1];
2335                 data += 2;
2336                 size -= 2;
2337                 flags = *data;
2338                 data += 1;
2339                 size -= 1;
2340                 switch ((flags & 0x06) >> 1) {
2341                     case 0x0: /* no lacing */
2342                         laces = 1;
2343                         lace_size = av_mallocz(sizeof(int));
2344                         lace_size[0] = size;
2345                         break;
2346
2347                     case 0x1: /* xiph lacing */
2348                     case 0x2: /* fixed-size lacing */
2349                     case 0x3: /* EBML lacing */
2350                         if (size == 0) {
2351                             res = -1;
2352                             break;
2353                         }
2354                         laces = (*data) + 1;
2355                         data += 1;
2356                         size -= 1;
2357                         lace_size = av_mallocz(laces * sizeof(int));
2358
2359                         switch ((flags & 0x06) >> 1) {
2360                             case 0x1: /* xiph lacing */ {
2361                                 uint8_t temp;
2362                                 uint32_t total = 0;
2363                                 for (n = 0; res == 0 && n < laces - 1; n++) {
2364                                     while (1) {
2365                                         if (size == 0) {
2366                                             res = -1;
2367                                             break;
2368                                         }
2369                                         temp = *data;
2370                                         lace_size[n] += temp;
2371                                         data += 1;
2372                                         size -= 1;
2373                                         if (temp != 0xff)
2374                                             break;
2375                                     }
2376                                     total += lace_size[n];
2377                                 }
2378                                 lace_size[n] = size - total;
2379                                 break;
2380                             }
2381
2382                             case 0x2: /* fixed-size lacing */
2383                                 for (n = 0; n < laces; n++)
2384                                     lace_size[n] = size / laces;
2385                                 break;
2386
2387                             case 0x3: /* EBML lacing */ {
2388                                 uint32_t total;
2389                                 n = matroska_ebmlnum_uint(data, size, &num);
2390                                 if (n < 0) {
2391                                     av_log(matroska->ctx, AV_LOG_INFO,
2392                                            "EBML block data error\n");
2393                                     break;
2394                                 }
2395                                 data += n;
2396                                 size -= n;
2397                                 total = lace_size[0] = num;
2398                                 for (n = 1; res == 0 && n < laces - 1; n++) {
2399                                     int64_t snum;
2400                                     int r;
2401                                     r = matroska_ebmlnum_sint (data, size,
2402                                                                &snum);
2403                                     if (r < 0) {
2404                                         av_log(matroska->ctx, AV_LOG_INFO,
2405                                                "EBML block data error\n");
2406                                         break;
2407                                     }
2408                                     data += r;
2409                                     size -= r;
2410                                     lace_size[n] = lace_size[n - 1] + snum;
2411                                     total += lace_size[n];
2412                                 }
2413                                 lace_size[n] = size - total;
2414                                 break;
2415                             }
2416                         }
2417                         break;
2418                 }
2419
2420                 if (res == 0) {
2421                     for (n = 0; n < laces; n++) {
2422                         uint64_t timecode = AV_NOPTS_VALUE;
2423
2424                         pkt = av_mallocz(sizeof(AVPacket));
2425                         /* XXX: prevent data copy... */
2426                         if (av_new_packet(pkt,lace_size[n]) < 0) {
2427                             res = AVERROR_NOMEM;
2428                             break;
2429                         }
2430                         if (cluster_time != (uint64_t)-1 && n == 0) {
2431                             if (cluster_time + block_time >= 0)
2432                                 timecode = cluster_time + block_time;
2433                         }
2434                         /* FIXME: duration */
2435
2436                         memcpy(pkt->data, data, lace_size[n]);
2437                         data += lace_size[n];
2438                         if (n == 0)
2439                             pkt->flags = is_keyframe;
2440                         pkt->stream_index =
2441                             matroska->tracks[track]->stream_index;
2442
2443                         pkt->pts = timecode;
2444                         pkt->pos= pos;
2445
2446                         matroska_queue_packet(matroska, pkt);
2447                     }
2448                 }
2449
2450                 av_free(lace_size);
2451                 av_free(origdata);
2452                 break;
2453             }
2454
2455             case MATROSKA_ID_BLOCKDURATION: {
2456                 uint64_t num;
2457                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
2458                     break;
2459                 av_log(matroska->ctx, AV_LOG_INFO,
2460                        "FIXME: implement support for BlockDuration\n");
2461                 break;
2462             }
2463
2464             case MATROSKA_ID_BLOCKREFERENCE:
2465                 /* We've found a reference, so not even the first frame in
2466                  * the lace is a key frame. */
2467                 is_keyframe = 0;
2468                 if (last_num_packets != matroska->num_packets)
2469                     matroska->packets[last_num_packets]->flags = 0;
2470                 res = ebml_read_skip(matroska);
2471                 break;
2472
2473             default:
2474                 av_log(matroska->ctx, AV_LOG_INFO,
2475                        "Unknown entry 0x%x in blockgroup data\n", id);
2476                 /* fall-through */
2477
2478             case EBML_ID_VOID:
2479                 res = ebml_read_skip(matroska);
2480                 break;
2481         }
2482
2483         if (matroska->level_up) {
2484             matroska->level_up--;
2485             break;
2486         }
2487     }
2488
2489     return res;
2490 }
2491
2492 static int
2493 matroska_parse_cluster (MatroskaDemuxContext *matroska)
2494 {
2495     int res = 0;
2496     uint32_t id;
2497     uint64_t cluster_time = 0;
2498
2499     av_log(matroska->ctx, AV_LOG_DEBUG,
2500            "parsing cluster at %"PRId64"\n", url_ftell(&matroska->ctx->pb));
2501
2502     while (res == 0) {
2503         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2504             res = AVERROR_IO;
2505             break;
2506         } else if (matroska->level_up) {
2507             matroska->level_up--;
2508             break;
2509         }
2510
2511         switch (id) {
2512             /* cluster timecode */
2513             case MATROSKA_ID_CLUSTERTIMECODE: {
2514                 uint64_t num;
2515                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
2516                     break;
2517                 cluster_time = num;
2518                 break;
2519             }
2520
2521                 /* a group of blocks inside a cluster */
2522             case MATROSKA_ID_BLOCKGROUP:
2523                 if ((res = ebml_read_master(matroska, &id)) < 0)
2524                     break;
2525                 res = matroska_parse_blockgroup(matroska, cluster_time);
2526                 break;
2527
2528             default:
2529                 av_log(matroska->ctx, AV_LOG_INFO,
2530                        "Unknown entry 0x%x in cluster data\n", id);
2531                 /* fall-through */
2532
2533             case EBML_ID_VOID:
2534                 res = ebml_read_skip(matroska);
2535                 break;
2536         }
2537
2538         if (matroska->level_up) {
2539             matroska->level_up--;
2540             break;
2541         }
2542     }
2543
2544     return res;
2545 }
2546
2547 static int
2548 matroska_read_packet (AVFormatContext *s,
2549                       AVPacket        *pkt)
2550 {
2551     MatroskaDemuxContext *matroska = s->priv_data;
2552     int res = 0;
2553     uint32_t id;
2554
2555     /* Do we still have a packet queued? */
2556     if (matroska_deliver_packet(matroska, pkt) == 0)
2557         return 0;
2558
2559     /* Have we already reached the end? */
2560     if (matroska->done)
2561         return AVERROR_IO;
2562
2563     while (res == 0) {
2564         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2565             res = AVERROR_IO;
2566             break;
2567         } else if (matroska->level_up) {
2568             matroska->level_up--;
2569             break;
2570         }
2571
2572         switch (id) {
2573             case MATROSKA_ID_CLUSTER:
2574                 if ((res = ebml_read_master(matroska, &id)) < 0)
2575                     break;
2576                 if ((res = matroska_parse_cluster(matroska)) == 0)
2577                     res = 1; /* Parsed one cluster, let's get out. */
2578                 break;
2579
2580             default:
2581             case EBML_ID_VOID:
2582                 res = ebml_read_skip(matroska);
2583                 break;
2584         }
2585
2586         if (matroska->level_up) {
2587             matroska->level_up--;
2588             break;
2589         }
2590     }
2591
2592     if (res == -1)
2593         matroska->done = 1;
2594
2595     return matroska_deliver_packet(matroska, pkt);
2596 }
2597
2598 static int
2599 matroska_read_close (AVFormatContext *s)
2600 {
2601     MatroskaDemuxContext *matroska = s->priv_data;
2602     int n = 0;
2603
2604     if (matroska->writing_app)
2605         av_free(matroska->writing_app);
2606     if (matroska->muxing_app)
2607         av_free(matroska->muxing_app);
2608     if (matroska->index)
2609         av_free(matroska->index);
2610
2611     if (matroska->packets != NULL) {
2612         for (n = 0; n < matroska->num_packets; n++) {
2613             av_free_packet(matroska->packets[n]);
2614             av_free(matroska->packets[n]);
2615         }
2616         av_free(matroska->packets);
2617     }
2618
2619     for (n = 0; n < matroska->num_tracks; n++) {
2620         MatroskaTrack *track = matroska->tracks[n];
2621         if (track->codec_id)
2622             av_free(track->codec_id);
2623         if (track->codec_name)
2624             av_free(track->codec_name);
2625         if (track->codec_priv)
2626             av_free(track->codec_priv);
2627         if (track->name)
2628             av_free(track->name);
2629         if (track->language)
2630             av_free(track->language);
2631
2632         av_free(track);
2633     }
2634
2635     memset(matroska, 0, sizeof(MatroskaDemuxContext));
2636
2637     return 0;
2638 }
2639
2640 AVInputFormat matroska_demuxer = {
2641     "matroska",
2642     "Matroska file format",
2643     sizeof(MatroskaDemuxContext),
2644     matroska_probe,
2645     matroska_read_header,
2646     matroska_read_packet,
2647     matroska_read_close,
2648 };