]> git.sesse.net Git - ffmpeg/blob - libavformat/matroska.c
Patch from Gianluigi Tiesi (sherpya at netfarm dot it):
[ffmpeg] / libavformat / matroska.c
1 /*
2  * Matroska file demuxer (no muxer yet)
3  * Copyright (c) 2003-2004 The ffmpeg Project
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19
20 /**
21  * @file matroska.c
22  * Matroska file demuxer
23  * by Ronald Bultje <rbultje@ronald.bitfreak.net>
24  * with a little help from Moritz Bunkus <moritz@bunkus.org>
25  * Specs available on the matroska project page:
26  * http://www.matroska.org/.
27  */
28
29 #include "avformat.h"
30 /* For codec_get_bmp_id and codec_get_wav_id. */
31 #include "avi.h"
32
33 /* EBML version supported */
34 #define EBML_VERSION 1
35
36 /* top-level master-IDs */
37 #define EBML_ID_HEADER             0x1A45DFA3
38
39 /* IDs in the HEADER master */
40 #define EBML_ID_EBMLVERSION        0x4286
41 #define EBML_ID_EBMLREADVERSION    0x42F7
42 #define EBML_ID_EBMLMAXIDLENGTH    0x42F2
43 #define EBML_ID_EBMLMAXSIZELENGTH  0x42F3
44 #define EBML_ID_DOCTYPE            0x4282
45 #define EBML_ID_DOCTYPEVERSION     0x4287
46 #define EBML_ID_DOCTYPEREADVERSION 0x4285
47
48 /* general EBML types */
49 #define EBML_ID_VOID               0xEC
50
51 /*
52  * Matroska element IDs. max. 32-bit.
53  */
54
55 /* toplevel segment */
56 #define MATROSKA_ID_SEGMENT    0x18538067
57
58 /* matroska top-level master IDs */
59 #define MATROSKA_ID_INFO       0x1549A966
60 #define MATROSKA_ID_TRACKS     0x1654AE6B
61 #define MATROSKA_ID_CUES       0x1C53BB6B
62 #define MATROSKA_ID_TAGS       0x1254C367
63 #define MATROSKA_ID_SEEKHEAD   0x114D9B74
64 #define MATROSKA_ID_CLUSTER    0x1F43B675
65
66 /* IDs in the info master */
67 #define MATROSKA_ID_TIMECODESCALE 0x2AD7B1
68 #define MATROSKA_ID_DURATION   0x4489
69 #define MATROSKA_ID_WRITINGAPP 0x5741
70 #define MATROSKA_ID_MUXINGAPP  0x4D80
71 #define MATROSKA_ID_DATEUTC    0x4461
72
73 /* ID in the tracks master */
74 #define MATROSKA_ID_TRACKENTRY 0xAE
75
76 /* IDs in the trackentry master */
77 #define MATROSKA_ID_TRACKNUMBER 0xD7
78 #define MATROSKA_ID_TRACKUID   0x73C5
79 #define MATROSKA_ID_TRACKTYPE  0x83
80 #define MATROSKA_ID_TRACKAUDIO 0xE1
81 #define MATROSKA_ID_TRACKVIDEO 0xE0
82 #define MATROSKA_ID_CODECID    0x86
83 #define MATROSKA_ID_CODECPRIVATE 0x63A2
84 #define MATROSKA_ID_CODECNAME  0x258688
85 #define MATROSKA_ID_CODECINFOURL 0x3B4040
86 #define MATROSKA_ID_CODECDOWNLOADURL 0x26B240
87 #define MATROSKA_ID_TRACKNAME  0x536E
88 #define MATROSKA_ID_TRACKLANGUAGE 0x22B59C
89 #define MATROSKA_ID_TRACKFLAGENABLED 0xB9
90 #define MATROSKA_ID_TRACKFLAGDEFAULT 0x88
91 #define MATROSKA_ID_TRACKFLAGLACING 0x9C
92 #define MATROSKA_ID_TRACKMINCACHE 0x6DE7
93 #define MATROSKA_ID_TRACKMAXCACHE 0x6DF8
94 #define MATROSKA_ID_TRACKDEFAULTDURATION 0x23E383
95
96 /* IDs in the trackvideo master */
97 #define MATROSKA_ID_VIDEOFRAMERATE 0x2383E3
98 #define MATROSKA_ID_VIDEODISPLAYWIDTH 0x54B0
99 #define MATROSKA_ID_VIDEODISPLAYHEIGHT 0x54BA
100 #define MATROSKA_ID_VIDEOPIXELWIDTH 0xB0
101 #define MATROSKA_ID_VIDEOPIXELHEIGHT 0xBA
102 #define MATROSKA_ID_VIDEOFLAGINTERLACED 0x9A
103 #define MATROSKA_ID_VIDEOSTEREOMODE 0x53B9
104 #define MATROSKA_ID_VIDEOASPECTRATIO 0x54B3
105 #define MATROSKA_ID_VIDEOCOLOURSPACE 0x2EB524
106
107 /* IDs in the trackaudio master */
108 #define MATROSKA_ID_AUDIOSAMPLINGFREQ 0xB5
109 #define MATROSKA_ID_AUDIOBITDEPTH 0x6264
110 #define MATROSKA_ID_AUDIOCHANNELS 0x9F
111
112 /* ID in the cues master */
113 #define MATROSKA_ID_POINTENTRY 0xBB
114
115 /* IDs in the pointentry master */
116 #define MATROSKA_ID_CUETIME    0xB3
117 #define MATROSKA_ID_CUETRACKPOSITION 0xB7
118
119 /* IDs in the cuetrackposition master */
120 #define MATROSKA_ID_CUETRACK   0xF7
121 #define MATROSKA_ID_CUECLUSTERPOSITION 0xF1
122
123 /* IDs in the tags master */
124 /* TODO */
125
126 /* IDs in the seekhead master */
127 #define MATROSKA_ID_SEEKENTRY  0x4DBB
128
129 /* IDs in the seekpoint master */
130 #define MATROSKA_ID_SEEKID     0x53AB
131 #define MATROSKA_ID_SEEKPOSITION 0x53AC
132
133 /* IDs in the cluster master */
134 #define MATROSKA_ID_CLUSTERTIMECODE 0xE7
135 #define MATROSKA_ID_BLOCKGROUP 0xA0
136
137 /* IDs in the blockgroup master */
138 #define MATROSKA_ID_BLOCK      0xA1
139 #define MATROSKA_ID_BLOCKDURATION 0x9B
140 #define MATROSKA_ID_BLOCKREFERENCE 0xFB
141
142 typedef enum {
143   MATROSKA_TRACK_TYPE_VIDEO    = 0x1,
144   MATROSKA_TRACK_TYPE_AUDIO    = 0x2,
145   MATROSKA_TRACK_TYPE_COMPLEX  = 0x3,
146   MATROSKA_TRACK_TYPE_LOGO     = 0x10,
147   MATROSKA_TRACK_TYPE_SUBTITLE = 0x11,
148   MATROSKA_TRACK_TYPE_CONTROL  = 0x20,
149 } MatroskaTrackType;
150
151 typedef enum {
152   MATROSKA_EYE_MODE_MONO  = 0x0,
153   MATROSKA_EYE_MODE_RIGHT = 0x1,
154   MATROSKA_EYE_MODE_LEFT  = 0x2,
155   MATROSKA_EYE_MODE_BOTH  = 0x3,
156 } MatroskaEyeMode;
157
158 typedef enum {
159   MATROSKA_ASPECT_RATIO_MODE_FREE  = 0x0,
160   MATROSKA_ASPECT_RATIO_MODE_KEEP  = 0x1,
161   MATROSKA_ASPECT_RATIO_MODE_FIXED = 0x2,
162 } MatroskaAspectRatioMode;
163
164 /*
165  * These aren't in any way "matroska-form" things,
166  * it's just something I use in the muxer/demuxer.
167  */
168
169 typedef enum {
170   MATROSKA_TRACK_ENABLED = (1<<0),
171   MATROSKA_TRACK_DEFAULT = (1<<1),
172   MATROSKA_TRACK_LACING  = (1<<2),
173   MATROSKA_TRACK_SHIFT   = (1<<16)
174 } MatroskaTrackFlags;
175
176 typedef enum {
177   MATROSKA_VIDEOTRACK_INTERLACED = (MATROSKA_TRACK_SHIFT<<0)
178 } MatroskaVideoTrackFlags;
179
180 /*
181  * Matroska Codec IDs. Strings.
182  */
183
184 #define MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC   "V_MS/VFW/FOURCC"
185 #define MATROSKA_CODEC_ID_VIDEO_UNCOMPRESSED "V_UNCOMPRESSED"
186 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_SP     "V_MPEG4/ISO/SP"
187 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_ASP    "V_MPEG4/ISO/ASP"
188 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_AP     "V_MPEG4/ISO/AP"
189 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_AVC    "V_MPEG4/ISO/AVC"
190 #define MATROSKA_CODEC_ID_VIDEO_MSMPEG4V3    "V_MPEG4/MS/V3"
191 #define MATROSKA_CODEC_ID_VIDEO_MPEG1        "V_MPEG1"
192 #define MATROSKA_CODEC_ID_VIDEO_MPEG2        "V_MPEG2"
193 #define MATROSKA_CODEC_ID_VIDEO_MJPEG        "V_MJPEG"
194 /* TODO: Real/Quicktime */
195
196 #define MATROSKA_CODEC_ID_AUDIO_ACM          "A_MS/ACM"
197 #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L1     "A_MPEG/L1"
198 #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L2     "A_MPEG/L2"
199 #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L3     "A_MPEG/L3"
200 #define MATROSKA_CODEC_ID_AUDIO_PCM_INT_BE   "A_PCM/INT/BIG"
201 #define MATROSKA_CODEC_ID_AUDIO_PCM_INT_LE   "A_PCM/INT/LIT"
202 #define MATROSKA_CODEC_ID_AUDIO_PCM_FLOAT    "A_PCM/FLOAT/IEEE"
203 #define MATROSKA_CODEC_ID_AUDIO_AC3          "A_AC3"
204 #define MATROSKA_CODEC_ID_AUDIO_DTS          "A_DTS"
205 #define MATROSKA_CODEC_ID_AUDIO_VORBIS       "A_VORBIS"
206 #define MATROSKA_CODEC_ID_AUDIO_ACM          "A_MS/ACM"
207 #define MATROSKA_CODEC_ID_AUDIO_MPEG2        "A_AAC/MPEG2/"
208 #define MATROSKA_CODEC_ID_AUDIO_MPEG4        "A_AAC/MPEG4/"
209 /* TODO: AC3-9/10 (?), Real, Musepack, Quicktime */
210
211 /* max. depth in the EBML tree structure */
212 #define EBML_MAX_DEPTH 16
213
214 typedef struct Track {
215     MatroskaTrackType type;
216
217     /* Unique track number and track ID. stream_index is the index that
218      * the calling app uses for this track. */
219     uint32_t num,
220         uid,
221         stream_index;
222
223     char *name,
224         *language;
225
226     char *codec_id,
227         *codec_name;
228
229     unsigned char *codec_priv;
230     int codec_priv_size;
231
232     int64_t default_duration;
233     MatroskaTrackFlags flags;
234 } MatroskaTrack;
235
236 typedef struct MatroskaVideoTrack {
237     MatroskaTrack track;
238
239     int pixel_width,
240         pixel_height,
241         display_width,
242         display_height;
243
244     uint32_t fourcc;
245
246     MatroskaAspectRatioMode ar_mode;
247     MatroskaEyeMode eye_mode;
248
249     //..
250 } MatroskaVideoTrack;
251
252 typedef struct MatroskaAudioTrack {
253     MatroskaTrack track;
254
255     int channels,
256         bitdepth,
257         samplerate;
258     //..
259 } MatroskaAudioTrack;
260
261 typedef struct MatroskaSubtitleTrack {
262     MatroskaTrack track;
263
264     //..
265 } MatroskaSubtitleTrack;
266
267 typedef struct MatroskaLevel {
268     uint64_t start, length;
269 } MatroskaLevel;
270
271 typedef struct MatroskaDemuxIndex {
272   uint64_t        pos;   /* of the corresponding *cluster*! */
273   uint16_t        track; /* reference to 'num' */
274   uint64_t        time;  /* in nanoseconds */
275 } MatroskaDemuxIndex;
276
277 typedef struct MatroskaDemuxContext {
278     AVFormatContext *ctx;
279
280     /* ebml stuff */
281     int num_levels;
282     MatroskaLevel levels[EBML_MAX_DEPTH];
283     int level_up;
284
285     /* matroska stuff */
286     char *writing_app,
287         *muxing_app;
288     int64_t created;
289
290     /* timescale in the file */
291     int64_t time_scale;
292
293     /* length, position (time, ns) */
294     int64_t duration,
295         pos;
296
297     /* num_streams is the number of streams that av_new_stream() was called
298      * for ( = that are available to the calling program). */
299     int num_tracks, num_streams;
300     MatroskaTrack *tracks[MAX_STREAMS];
301
302     /* cache for ID peeking */
303     uint32_t peek_id;
304
305     /* byte position of the segment inside the stream */
306     offset_t segment_start;
307
308     /* The packet queue. */
309     AVPacket **packets;
310     int num_packets;
311
312     /* have we already parse metadata/cues/clusters? */
313     int metadata_parsed,
314         index_parsed,
315         done;
316
317     /* The index for seeking. */
318     int num_indexes;
319     MatroskaDemuxIndex *index;
320 } MatroskaDemuxContext;
321
322 /*
323  * The first few functions handle EBML file parsing. The rest
324  * is the document interpretation. Matroska really just is a
325  * EBML file.
326  */
327
328 /*
329  * Return: the amount of levels in the hierarchy that the
330  * current element lies higher than the previous one.
331  * The opposite isn't done - that's auto-done using master
332  * element reading.
333  */
334
335 static int
336 ebml_read_element_level_up (MatroskaDemuxContext *matroska)
337 {
338     ByteIOContext *pb = &matroska->ctx->pb;
339     offset_t pos = url_ftell(pb);
340     int num = 0;
341
342     while (matroska->num_levels > 0) {
343         MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
344
345         if (pos >= level->start + level->length) {
346             matroska->num_levels--;
347             num++;
348         } else {
349             break;
350         }
351     }
352
353     return num;
354 }
355
356 /*
357  * Read: an "EBML number", which is defined as a variable-length
358  * array of bytes. The first byte indicates the length by giving a
359  * number of 0-bits followed by a one. The position of the first
360  * "one" bit inside the first byte indicates the length of this
361  * number.
362  * Returns: num. of bytes read. < 0 on error.
363  */
364
365 static int
366 ebml_read_num (MatroskaDemuxContext *matroska,
367                int                   max_size,
368                uint64_t             *number)
369 {
370     ByteIOContext *pb = &matroska->ctx->pb;
371     int len_mask = 0x80, read = 1, n = 1;
372     int64_t total = 0;
373
374     /* the first byte tells us the length in bytes - get_byte() can normally
375      * return 0, but since that's not a valid first ebmlID byte, we can
376      * use it safely here to catch EOS. */
377     if (!(total = get_byte(pb))) {
378         /* we might encounter EOS here */
379         if (!url_feof(pb)) {
380             offset_t pos = url_ftell(pb);
381             av_log(matroska->ctx, AV_LOG_ERROR,
382                    "Read error at pos. %llu (0x%llx)\n",
383                    pos, pos);
384         }
385         return AVERROR_IO; /* EOS or actual I/O error */
386     }
387
388     /* get the length of the EBML number */
389     while (read <= max_size && !(total & len_mask)) {
390         read++;
391         len_mask >>= 1;
392     }
393     if (read > max_size) {
394         offset_t pos = url_ftell(pb) - 1;
395         av_log(matroska->ctx, AV_LOG_ERROR,
396                "Invalid EBML number size tag 0x%02x at pos %llu (0x%llx)\n",
397                (uint8_t) total, pos, pos);
398         return AVERROR_INVALIDDATA;
399     }
400
401     /* read out length */
402     total &= ~len_mask;
403     while (n++ < read)
404         total = (total << 8) | get_byte(pb);
405
406     *number = total;
407
408     return read;
409 }
410
411 /*
412  * Read: the element content data ID.
413  * Return: the number of bytes read or < 0 on error.
414  */
415
416 static int
417 ebml_read_element_id (MatroskaDemuxContext *matroska,
418                       uint32_t             *id,
419                       int                  *level_up)
420 {
421     int read;
422     uint64_t total;
423
424     /* if we re-call this, use our cached ID */
425     if (matroska->peek_id != 0) {
426         if (level_up)
427             *level_up = 0;
428         *id = matroska->peek_id;
429         return 0;
430     }
431
432     /* read out the "EBML number", include tag in ID */
433     if ((read = ebml_read_num(matroska, 4, &total)) < 0)
434         return read;
435     *id = matroska->peek_id  = total | (1 << (read * 7));
436
437     /* level tracking */
438     if (level_up)
439         *level_up = ebml_read_element_level_up(matroska);
440
441     return read;
442 }
443
444 /*
445  * Read: element content length.
446  * Return: the number of bytes read or < 0 on error.
447  */
448
449 static int
450 ebml_read_element_length (MatroskaDemuxContext *matroska,
451                           uint64_t             *length)
452 {
453     /* clear cache since we're now beyond that data point */
454     matroska->peek_id = 0;
455
456     /* read out the "EBML number", include tag in ID */
457     return ebml_read_num(matroska, 8, length);
458 }
459
460 /*
461  * Return: the ID of the next element, or 0 on error.
462  * Level_up contains the amount of levels that this
463  * next element lies higher than the previous one.
464  */
465
466 static uint32_t
467 ebml_peek_id (MatroskaDemuxContext *matroska,
468               int                  *level_up)
469 {
470     uint32_t id;
471
472     assert(level_up != NULL);
473
474     if (ebml_read_element_id(matroska, &id, level_up) < 0)
475         return 0;
476
477     return id;
478 }
479
480 /*
481  * Seek to a given offset.
482  * 0 is success, -1 is failure.
483  */
484
485 static int
486 ebml_read_seek (MatroskaDemuxContext *matroska,
487                 offset_t              offset)
488 {
489     ByteIOContext *pb = &matroska->ctx->pb;
490
491     /* clear ID cache, if any */
492     matroska->peek_id = 0;
493
494     return (url_fseek(pb, offset, SEEK_SET) == offset) ? 0 : -1;
495 }
496
497 /*
498  * Skip the next element.
499  * 0 is success, -1 is failure.
500  */
501
502 static int
503 ebml_read_skip (MatroskaDemuxContext *matroska)
504 {
505     ByteIOContext *pb = &matroska->ctx->pb;
506     uint32_t id;
507     uint64_t length;
508     int res;
509
510     if ((res = ebml_read_element_id(matroska, &id, NULL)) < 0 ||
511         (res = ebml_read_element_length(matroska, &length)) < 0)
512         return res;
513
514     url_fskip(pb, length);
515
516     return 0;
517 }
518
519 /*
520  * Read the next element as an unsigned int.
521  * 0 is success, < 0 is failure.
522  */
523
524 static int
525 ebml_read_uint (MatroskaDemuxContext *matroska,
526                 uint32_t             *id,
527                 uint64_t             *num)
528 {
529     ByteIOContext *pb = &matroska->ctx->pb;
530     int n = 0, size, res;
531     uint64_t rlength;
532
533     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
534         (res = ebml_read_element_length(matroska, &rlength)) < 0)
535         return res;
536     size = rlength;
537     if (size < 1 || size > 8) {
538         offset_t pos = url_ftell(pb);
539         av_log(matroska->ctx, AV_LOG_ERROR,
540                "Invalid uint element size %d at position %lld (0x%llx)\n",
541                 size, pos, pos);
542         return AVERROR_INVALIDDATA;
543     }
544
545     /* big-endian ordening; build up number */
546     *num = 0;
547     while (n++ < size)
548         *num = (*num << 8) | get_byte(pb);
549
550     return 0;
551 }
552
553 /*
554  * Read the next element as a signed int.
555  * 0 is success, < 0 is failure.
556  */
557
558 static int
559 ebml_read_sint (MatroskaDemuxContext *matroska,
560                 uint32_t             *id,
561                 int64_t              *num)
562 {
563     ByteIOContext *pb = &matroska->ctx->pb;
564     int size, n = 1, negative = 0, res;
565     uint64_t rlength;
566
567     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
568         (res = ebml_read_element_length(matroska, &rlength)) < 0)
569         return res;
570     size = rlength;
571     if (size < 1 || size > 8) {
572         offset_t pos = url_ftell(pb);
573         av_log(matroska->ctx, AV_LOG_ERROR,
574                "Invalid sint element size %d at position %lld (0x%llx)\n",
575                 size, pos, pos);
576         return AVERROR_INVALIDDATA;
577     }
578     if ((*num = get_byte(pb)) & 0x80) {
579         negative = 1;
580         *num &= ~0x80;
581     }
582     *num = 0;
583     while (n++ < size)
584         *num = (*num << 8) | get_byte(pb);
585
586     /* make signed */
587     if (negative)
588         *num = *num - (1LL << ((8 * size) - 1));
589
590     return 0;
591 }
592
593 /*
594  * Read the next element as a float.
595  * 0 is success, < 0 is failure.
596  */
597
598 static int
599 ebml_read_float (MatroskaDemuxContext *matroska,
600                  uint32_t             *id,
601                  double               *num)
602 {
603     ByteIOContext *pb = &matroska->ctx->pb;
604     int size, res;
605     uint64_t rlength;
606
607     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
608         (res = ebml_read_element_length(matroska, &rlength)) < 0)
609         return res;
610     size = rlength;
611
612     if (size != 4 && size != 8 && size != 10) {
613         offset_t pos = url_ftell(pb);
614         av_log(matroska->ctx, AV_LOG_ERROR,
615                "Invalid float element size %d at position %llu (0x%llx)\n",
616                size, pos, pos);
617         return AVERROR_INVALIDDATA;
618     }
619     if (size == 10) {
620         av_log(matroska->ctx, AV_LOG_ERROR,
621                "FIXME! 10-byte floats unimplemented\n");
622         return AVERROR_UNKNOWN;
623     }
624
625     if (size == 4) {
626         float f;
627
628         while (size-- > 0)
629 #ifdef WORDS_BIGENDIAN
630             ((uint8_t *) &f)[3 - size] = get_byte(pb);
631 #else
632             ((uint8_t *) &f)[size] = get_byte(pb);
633 #endif
634
635         *num = f;
636     } else {
637         double d;
638
639         while (size-- > 0)
640 #ifdef WORDS_BIGENDIAN
641             ((uint8_t *) &d)[7 - size] = get_byte(pb);
642 #else
643             ((uint8_t *) &d)[size] = get_byte(pb);
644 #endif
645
646         *num = d;
647     }
648
649     return 0;
650 }
651
652 /*
653  * Read the next element as an ASCII string.
654  * 0 is success, < 0 is failure.
655  */
656
657 static int
658 ebml_read_ascii (MatroskaDemuxContext *matroska,
659                  uint32_t             *id,
660                  char                **str)
661 {
662     ByteIOContext *pb = &matroska->ctx->pb;
663     int size, res;
664     uint64_t rlength;
665
666     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
667         (res = ebml_read_element_length(matroska, &rlength)) < 0)
668         return res;
669     size = rlength;
670
671     /* ebml strings are usually not 0-terminated, so we allocate one
672      * byte more, read the string and NULL-terminate it ourselves. */
673     if (size < 0 || !(*str = av_malloc(size + 1))) {
674         av_log(matroska->ctx, AV_LOG_ERROR, "Memory allocation failed\n");
675         return AVERROR_NOMEM;
676     }
677     if (get_buffer(pb, (uint8_t *) *str, size) != size) {
678         offset_t pos = url_ftell(pb);
679         av_log(matroska->ctx, AV_LOG_ERROR,
680                "Read error at pos. %llu (0x%llx)\n", pos, pos);
681         return AVERROR_IO;
682     }
683     (*str)[size] = '\0';
684
685     return 0;
686 }
687
688 /*
689  * Read the next element as a UTF-8 string.
690  * 0 is success, < 0 is failure.
691  */
692
693 static int
694 ebml_read_utf8 (MatroskaDemuxContext *matroska,
695                 uint32_t             *id,
696                 char                **str)
697 {
698   return ebml_read_ascii(matroska, id, str);
699 }
700
701 /*
702  * Read the next element as a date (nanoseconds since 1/1/2000).
703  * 0 is success, < 0 is failure.
704  */
705
706 static int
707 ebml_read_date (MatroskaDemuxContext *matroska,
708                 uint32_t             *id,
709                 int64_t              *date)
710 {
711   return ebml_read_sint(matroska, id, date);
712 }
713
714 /*
715  * Read the next element, but only the header. The contents
716  * are supposed to be sub-elements which can be read separately.
717  * 0 is success, < 0 is failure.
718  */
719
720 static int
721 ebml_read_master (MatroskaDemuxContext *matroska,
722                   uint32_t             *id)
723 {
724     ByteIOContext *pb = &matroska->ctx->pb;
725     uint64_t length;
726     MatroskaLevel *level;
727     int res;
728
729     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
730         (res = ebml_read_element_length(matroska, &length)) < 0)
731         return res;
732
733     /* protect... (Heaven forbids that the '>' is true) */
734     if (matroska->num_levels >= EBML_MAX_DEPTH) {
735         av_log(matroska->ctx, AV_LOG_ERROR,
736                "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
737         return AVERROR_NOTSUPP;
738     }
739
740     /* remember level */
741     level = &matroska->levels[matroska->num_levels++];
742     level->start = url_ftell(pb);
743     level->length = length;
744
745     return 0;
746 }
747
748 /*
749  * Read the next element as binary data.
750  * 0 is success, < 0 is failure.
751  */
752
753 static int
754 ebml_read_binary (MatroskaDemuxContext *matroska,
755                   uint32_t             *id,
756                   uint8_t             **binary,
757                   int                  *size)
758 {
759     ByteIOContext *pb = &matroska->ctx->pb;
760     uint64_t rlength;
761     int res;
762
763     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
764         (res = ebml_read_element_length(matroska, &rlength)) < 0)
765         return res;
766     *size = rlength;
767
768     if (!(*binary = av_malloc(*size))) {
769         av_log(matroska->ctx, AV_LOG_ERROR,
770                "Memory allocation error\n");
771         return AVERROR_NOMEM;
772     }
773
774     if (get_buffer(pb, *binary, *size) != *size) {
775         offset_t pos = url_ftell(pb);
776         av_log(matroska->ctx, AV_LOG_ERROR,
777                "Read error at pos. %llu (0x%llx)\n", pos, pos);
778         return AVERROR_IO;
779     }
780
781     return 0;
782 }
783
784 /*
785  * Read signed/unsigned "EBML" numbers.
786  * Return: number of bytes processed, < 0 on error.
787  * XXX: use ebml_read_num().
788  */
789
790 static int
791 matroska_ebmlnum_uint (uint8_t  *data,
792                        uint32_t  size,
793                        uint64_t *num)
794 {
795     int len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
796     uint64_t total;
797
798     if (size <= 0)
799         return AVERROR_INVALIDDATA;
800
801     total = data[0];
802     while (read <= 8 && !(total & len_mask)) {
803         read++;
804         len_mask >>= 1;
805     }
806     if (read > 8)
807         return AVERROR_INVALIDDATA;
808
809     if ((total &= (len_mask - 1)) == len_mask - 1)
810         num_ffs++;
811     if (size < read)
812         return AVERROR_INVALIDDATA;
813     while (n < read) {
814         if (data[n] == 0xff)
815             num_ffs++;
816         total = (total << 8) | data[n];
817         n++;
818     }
819
820     if (!total)
821         return AVERROR_INVALIDDATA;
822
823     if (read == num_ffs)
824         *num = (uint64_t)-1;
825     else
826         *num = total;
827
828     return read;
829 }
830
831 /*
832  * Same as above, but signed.
833  */
834
835 static int
836 matroska_ebmlnum_sint (uint8_t  *data,
837                        uint32_t  size,
838                        int64_t  *num)
839 {
840     uint64_t unum;
841     int res;
842
843     /* read as unsigned number first */
844     if ((res = matroska_ebmlnum_uint(data, size, &unum)) < 0)
845         return res;
846
847     /* make signed (weird way) */
848     if (unum == (uint64_t)-1)
849         *num = INT64_MAX;
850     else
851         *num = unum - ((1LL << ((7 * res) - 1)) - 1);
852
853     return res;
854 }
855
856 /*
857  * Read an EBML header.
858  * 0 is success, < 0 is failure.
859  */
860
861 static int
862 ebml_read_header (MatroskaDemuxContext *matroska,
863                   char                **doctype,
864                   int                  *version)
865 {
866     uint32_t id;
867     int level_up, res = 0;
868
869     /* default init */
870     if (doctype)
871         *doctype = NULL;
872     if (version)
873         *version = 1;
874
875     if (!(id = ebml_peek_id(matroska, &level_up)) ||
876         level_up != 0 || id != EBML_ID_HEADER) {
877         av_log(matroska->ctx, AV_LOG_ERROR,
878                "This is not an EBML file (id=0x%x/0x%x)\n", id, EBML_ID_HEADER);
879         return AVERROR_INVALIDDATA;
880     }
881     if ((res = ebml_read_master(matroska, &id)) < 0)
882         return res;
883
884     while (res == 0) {
885         if (!(id = ebml_peek_id(matroska, &level_up)))
886             return AVERROR_IO;
887
888         /* end-of-header */
889         if (level_up)
890             break;
891
892         switch (id) {
893             /* is our read version uptodate? */
894             case EBML_ID_EBMLREADVERSION: {
895                 uint64_t num;
896
897                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
898                     return res;
899                 if (num > EBML_VERSION) {
900                     av_log(matroska->ctx, AV_LOG_ERROR,
901                            "EBML version %llu (> %d) is not supported\n",
902                            num, EBML_VERSION);
903                     return AVERROR_INVALIDDATA;
904                 }
905                 break;
906             }
907
908             /* we only handle 8 byte lengths at max */
909             case EBML_ID_EBMLMAXSIZELENGTH: {
910                 uint64_t num;
911
912                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
913                     return res;
914                 if (num > sizeof(uint64_t)) {
915                     av_log(matroska->ctx, AV_LOG_ERROR,
916                            "Integers of size %llu (> %d) not supported\n",
917                            num, sizeof(uint64_t));
918                     return AVERROR_INVALIDDATA;
919                 }
920                 break;
921             }
922
923             /* we handle 4 byte IDs at max */
924             case EBML_ID_EBMLMAXIDLENGTH: {
925                 uint64_t num;
926
927                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
928                     return res;
929                 if (num > sizeof(uint32_t)) {
930                     av_log(matroska->ctx, AV_LOG_ERROR,
931                            "IDs of size %llu (> %u) not supported\n",
932                             num, sizeof(uint32_t));
933                     return AVERROR_INVALIDDATA;
934                 }
935                 break;
936             }
937
938             case EBML_ID_DOCTYPE: {
939                 char *text;
940
941                 if ((res = ebml_read_ascii(matroska, &id, &text)) < 0)
942                     return res;
943                 if (doctype) {
944                     if (*doctype)
945                         av_free(*doctype);
946                     *doctype = text;
947                 } else
948                     av_free(text);
949                 break;
950             }
951
952             case EBML_ID_DOCTYPEREADVERSION: {
953                 uint64_t num;
954
955                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
956                     return res;
957                 if (version)
958                     *version = num;
959                 break;
960             }
961
962             default:
963                 av_log(matroska->ctx, AV_LOG_INFO,
964                        "Unknown data type 0x%x in EBML header", id);
965                 /* pass-through */
966
967             case EBML_ID_VOID:
968             /* we ignore these two, as they don't tell us anything we
969              * care about */
970             case EBML_ID_EBMLVERSION:
971             case EBML_ID_DOCTYPEVERSION:
972                 res = ebml_read_skip (matroska);
973                 break;
974         }
975     }
976
977     return 0;
978 }
979
980 /*
981  * Put one packet in an application-supplied AVPacket struct.
982  * Returns 0 on success or -1 on failure.
983  */
984
985 static int
986 matroska_deliver_packet (MatroskaDemuxContext *matroska,
987                          AVPacket             *pkt)
988 {
989     if (matroska->num_packets > 0) {
990         memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
991         av_free(matroska->packets[0]);
992         if (matroska->num_packets > 1) {
993             memmove(&matroska->packets[0], &matroska->packets[1],
994                     (matroska->num_packets - 1) * sizeof(AVPacket *));
995             matroska->packets =
996                 av_realloc(matroska->packets, (matroska->num_packets - 1) *
997                            sizeof(AVPacket *));
998         } else {
999             av_free(matroska->packets);
1000             matroska->packets = NULL;
1001         }
1002         matroska->num_packets--;
1003         return 0;
1004     }
1005
1006     return -1;
1007 }
1008
1009 /*
1010  * Put a packet into our internal queue. Will be delivered to the
1011  * user/application during the next get_packet() call.
1012  */
1013
1014 static void
1015 matroska_queue_packet (MatroskaDemuxContext *matroska,
1016                        AVPacket             *pkt)
1017 {
1018     matroska->packets =
1019         av_realloc(matroska->packets, (matroska->num_packets + 1) *
1020                    sizeof(AVPacket *));
1021     matroska->packets[matroska->num_packets] = pkt;
1022     matroska->num_packets++;
1023 }
1024
1025 /*
1026  * Autodetecting...
1027  */
1028
1029 static int
1030 matroska_probe (AVProbeData *p)
1031 {
1032     uint64_t total = 0;
1033     int len_mask = 0x80, size = 1, n = 1;
1034     uint8_t probe_data[] = { 'm', 'a', 't', 'r', 'o', 's', 'k', 'a' };
1035
1036     if (p->buf_size < 5)
1037         return 0;
1038
1039     /* ebml header? */
1040     if ((p->buf[0] << 24 | p->buf[1] << 16 |
1041          p->buf[2] << 8 | p->buf[3]) != EBML_ID_HEADER)
1042         return 0;
1043
1044     /* length of header */
1045     total = p->buf[4];
1046     while (size <= 8 && !(total & len_mask)) {
1047         size++;
1048         len_mask >>= 1;
1049     }
1050     if (size > 8)
1051       return 0;
1052     total &= (len_mask - 1);
1053     while (n < size)
1054         total = (total << 8) | p->buf[4 + n++];
1055
1056     /* does the probe data contain the whole header? */
1057     if (p->buf_size < 4 + size + total)
1058       return 0;
1059
1060     /* the header must contain the document type 'matroska'. For now,
1061      * we don't parse the whole header but simply check for the
1062      * availability of that array of characters inside the header.
1063      * Not fully fool-proof, but good enough. */
1064     for (n = 4 + size; n < 4 + size + total - sizeof(probe_data); n++)
1065         if (!memcmp (&p->buf[n], probe_data, sizeof(probe_data)))
1066             return AVPROBE_SCORE_MAX;
1067
1068     return 0;
1069 }
1070
1071 /*
1072  * From here on, it's all XML-style DTD stuff... Needs no comments.
1073  */
1074
1075 static int
1076 matroska_parse_info (MatroskaDemuxContext *matroska)
1077 {
1078     int res = 0;
1079     uint32_t id;
1080
1081     av_log(matroska->ctx, AV_LOG_DEBUG, "Parsing info...\n");
1082
1083     while (res == 0) {
1084         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1085             res = AVERROR_IO;
1086             break;
1087         } else if (matroska->level_up) {
1088             matroska->level_up--;
1089             break;
1090         }
1091
1092         switch (id) {
1093             /* cluster timecode */
1094             case MATROSKA_ID_TIMECODESCALE: {
1095                 uint64_t num;
1096                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1097                     break;
1098                 matroska->time_scale = num;
1099                 break;
1100             }
1101
1102             case MATROSKA_ID_DURATION: {
1103                 double num;
1104                 if ((res = ebml_read_float(matroska, &id, &num)) < 0)
1105                     break;
1106                 matroska->duration = num * matroska->time_scale;
1107                 break;
1108             }
1109
1110             case MATROSKA_ID_WRITINGAPP: {
1111                 char *text;
1112                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1113                     break;
1114                 matroska->writing_app = text;
1115                 break;
1116             }
1117
1118             case MATROSKA_ID_MUXINGAPP: {
1119                 char *text;
1120                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1121                     break;
1122                 matroska->muxing_app = text;
1123                 break;
1124             }
1125
1126             case MATROSKA_ID_DATEUTC: {
1127                 int64_t time;
1128                 if ((res = ebml_read_date(matroska, &id, &time)) < 0)
1129                     break;
1130                 matroska->created = time;
1131                 break;
1132             }
1133
1134             default:
1135                 av_log(matroska->ctx, AV_LOG_INFO,
1136                        "Unknown entry 0x%x in info header\n", id);
1137                 /* fall-through */
1138
1139             case EBML_ID_VOID:
1140                 res = ebml_read_skip(matroska);
1141                 break;
1142         }
1143
1144         if (matroska->level_up) {
1145             matroska->level_up--;
1146             break;
1147         }
1148     }
1149
1150     return res;
1151 }
1152
1153 static int
1154 matroska_add_stream (MatroskaDemuxContext *matroska)
1155 {
1156     int res = 0;
1157     uint32_t id;
1158     MatroskaTrack *track;
1159
1160     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing track, adding stream..,\n");
1161
1162     /* Allocate a generic track. As soon as we know its type we'll realloc. */
1163     track = av_mallocz(sizeof(MatroskaTrack));
1164     matroska->num_tracks++;
1165
1166     /* start with the master */
1167     if ((res = ebml_read_master(matroska, &id)) < 0)
1168         return res;
1169
1170     /* try reading the trackentry headers */
1171     while (res == 0) {
1172         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1173             res = AVERROR_IO;
1174             break;
1175         } else if (matroska->level_up > 0) {
1176             matroska->level_up--;
1177             break;
1178         }
1179
1180         switch (id) {
1181             /* track number (unique stream ID) */
1182             case MATROSKA_ID_TRACKNUMBER: {
1183                 uint64_t num;
1184                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1185                     break;
1186                 track->num = num;
1187                 break;
1188             }
1189
1190             /* track UID (unique identifier) */
1191             case MATROSKA_ID_TRACKUID: {
1192                 uint64_t num;
1193                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1194                     break;
1195                 track->uid = num;
1196                 break;
1197             }
1198
1199             /* track type (video, audio, combined, subtitle, etc.) */
1200             case MATROSKA_ID_TRACKTYPE: {
1201                 uint64_t num;
1202                 if (track->type != 0) {
1203                     av_log(matroska->ctx, AV_LOG_INFO,
1204                            "More than one tracktype in an entry - skip\n");
1205                     break;
1206                 }
1207                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1208                     break;
1209                 track->type = num;
1210
1211                 /* ok, so we're actually going to reallocate this thing */
1212                 switch (track->type) {
1213                     case MATROSKA_TRACK_TYPE_VIDEO:
1214                         track = (MatroskaTrack *)
1215                             av_realloc(track, sizeof(MatroskaVideoTrack));
1216                         break;
1217                     case MATROSKA_TRACK_TYPE_AUDIO:
1218                         track = (MatroskaTrack *)
1219                             av_realloc(track, sizeof(MatroskaAudioTrack));
1220                         ((MatroskaAudioTrack *)track)->channels = 1;
1221                         ((MatroskaAudioTrack *)track)->samplerate = 8000;
1222                         break;
1223                     case MATROSKA_TRACK_TYPE_SUBTITLE:
1224                         track = (MatroskaTrack *)
1225                             av_realloc(track, sizeof(MatroskaSubtitleTrack));
1226                         break;
1227                     case MATROSKA_TRACK_TYPE_COMPLEX:
1228                     case MATROSKA_TRACK_TYPE_LOGO:
1229                     case MATROSKA_TRACK_TYPE_CONTROL:
1230                     default:
1231                         av_log(matroska->ctx, AV_LOG_INFO,
1232                                "Unknown or unsupported track type 0x%x\n",
1233                                track->type);
1234                         track->type = 0;
1235                         break;
1236                 }
1237                 matroska->tracks[matroska->num_tracks - 1] = track;
1238                 break;
1239             }
1240
1241             /* tracktype specific stuff for video */
1242             case MATROSKA_ID_TRACKVIDEO: {
1243                 MatroskaVideoTrack *videotrack;
1244                 if (track->type != MATROSKA_TRACK_TYPE_VIDEO) {
1245                     av_log(matroska->ctx, AV_LOG_INFO,
1246                            "video data in non-video track - ignoring\n");
1247                     res = AVERROR_INVALIDDATA;
1248                     break;
1249                 } else if ((res = ebml_read_master(matroska, &id)) < 0)
1250                     break;
1251                 videotrack = (MatroskaVideoTrack *)track;
1252
1253                 while (res == 0) {
1254                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1255                         res = AVERROR_IO;
1256                         break;
1257                     } else if (matroska->level_up > 0) {
1258                         matroska->level_up--;
1259                         break;
1260                     }
1261
1262                     switch (id) {
1263                         /* fixme, this should be one-up, but I get it here */
1264                         case MATROSKA_ID_TRACKDEFAULTDURATION: {
1265                             uint64_t num;
1266                             if ((res = ebml_read_uint (matroska, &id,
1267                                                        &num)) < 0)
1268                                 break;
1269                             track->default_duration = num;
1270                             break;
1271                         }
1272
1273                         /* video framerate */
1274                         case MATROSKA_ID_VIDEOFRAMERATE: {
1275                             double num;
1276                             if ((res = ebml_read_float(matroska, &id,
1277                                                        &num)) < 0)
1278                                 break;
1279                             track->default_duration = 1000000000 * (1. / num);
1280                             break;
1281                         }
1282
1283                         /* width of the size to display the video at */
1284                         case MATROSKA_ID_VIDEODISPLAYWIDTH: {
1285                             uint64_t num;
1286                             if ((res = ebml_read_uint(matroska, &id,
1287                                                       &num)) < 0)
1288                                 break;
1289                             videotrack->display_width = num;
1290                             break;
1291                         }
1292
1293                         /* height of the size to display the video at */
1294                         case MATROSKA_ID_VIDEODISPLAYHEIGHT: {
1295                             uint64_t num;
1296                             if ((res = ebml_read_uint(matroska, &id,
1297                                                       &num)) < 0)
1298                                 break;
1299                             videotrack->display_height = num;
1300                             break;
1301                         }
1302
1303                         /* width of the video in the file */
1304                         case MATROSKA_ID_VIDEOPIXELWIDTH: {
1305                             uint64_t num;
1306                             if ((res = ebml_read_uint(matroska, &id,
1307                                                       &num)) < 0)
1308                                 break;
1309                             videotrack->pixel_width = num;
1310                             break;
1311                         }
1312
1313                         /* height of the video in the file */
1314                         case MATROSKA_ID_VIDEOPIXELHEIGHT: {
1315                             uint64_t num;
1316                             if ((res = ebml_read_uint(matroska, &id,
1317                                                       &num)) < 0)
1318                                 break;
1319                             videotrack->pixel_height = num;
1320                             break;
1321                         }
1322
1323                         /* whether the video is interlaced */
1324                         case MATROSKA_ID_VIDEOFLAGINTERLACED: {
1325                             uint64_t num;
1326                             if ((res = ebml_read_uint(matroska, &id,
1327                                                       &num)) < 0)
1328                                 break;
1329                             if (num)
1330                                 track->flags |=
1331                                     MATROSKA_VIDEOTRACK_INTERLACED;
1332                             else
1333                                 track->flags &=
1334                                     ~MATROSKA_VIDEOTRACK_INTERLACED;
1335                             break;
1336                         }
1337
1338                         /* stereo mode (whether the video has two streams,
1339                          * where one is for the left eye and the other for
1340                          * the right eye, which creates a 3D-like
1341                          * effect) */
1342                         case MATROSKA_ID_VIDEOSTEREOMODE: {
1343                             uint64_t num;
1344                             if ((res = ebml_read_uint(matroska, &id,
1345                                                       &num)) < 0)
1346                                 break;
1347                             if (num != MATROSKA_EYE_MODE_MONO &&
1348                                 num != MATROSKA_EYE_MODE_LEFT &&
1349                                 num != MATROSKA_EYE_MODE_RIGHT &&
1350                                 num != MATROSKA_EYE_MODE_BOTH) {
1351                                 av_log(matroska->ctx, AV_LOG_INFO,
1352                                        "Ignoring unknown eye mode 0x%x\n",
1353                                        (uint32_t) num);
1354                                 break;
1355                             }
1356                             videotrack->eye_mode = num;
1357                             break;
1358                         }
1359
1360                         /* aspect ratio behaviour */
1361                         case MATROSKA_ID_VIDEOASPECTRATIO: {
1362                             uint64_t num;
1363                             if ((res = ebml_read_uint(matroska, &id,
1364                                                       &num)) < 0)
1365                                 break;
1366                             if (num != MATROSKA_ASPECT_RATIO_MODE_FREE &&
1367                                 num != MATROSKA_ASPECT_RATIO_MODE_KEEP &&
1368                                 num != MATROSKA_ASPECT_RATIO_MODE_FIXED) {
1369                                 av_log(matroska->ctx, AV_LOG_INFO,
1370                                        "Ignoring unknown aspect ratio 0x%x\n",
1371                                        (uint32_t) num);
1372                                 break;
1373                             }
1374                             videotrack->ar_mode = num;
1375                             break;
1376                         }
1377
1378                         /* colourspace (only matters for raw video)
1379                          * fourcc */
1380                         case MATROSKA_ID_VIDEOCOLOURSPACE: {
1381                             uint64_t num;
1382                             if ((res = ebml_read_uint(matroska, &id,
1383                                                       &num)) < 0)
1384                                 break;
1385                             videotrack->fourcc = num;
1386                             break;
1387                         }
1388
1389                         default:
1390                             av_log(matroska->ctx, AV_LOG_INFO,
1391                                    "Unknown video track header entry "
1392                                    "0x%x - ignoring\n", id);
1393                             /* pass-through */
1394
1395                         case EBML_ID_VOID:
1396                             res = ebml_read_skip(matroska);
1397                             break;
1398                     }
1399
1400                     if (matroska->level_up) {
1401                         matroska->level_up--;
1402                         break;
1403                     }
1404                 }
1405                 break;
1406             }
1407
1408             /* tracktype specific stuff for audio */
1409             case MATROSKA_ID_TRACKAUDIO: {
1410                 MatroskaAudioTrack *audiotrack;
1411                 if (track->type != MATROSKA_TRACK_TYPE_AUDIO) {
1412                     av_log(matroska->ctx, AV_LOG_INFO,
1413                            "audio data in non-audio track - ignoring\n");
1414                     res = AVERROR_INVALIDDATA;
1415                     break;
1416                 } else if ((res = ebml_read_master(matroska, &id)) < 0)
1417                     break;
1418                 audiotrack = (MatroskaAudioTrack *)track;
1419
1420                 while (res == 0) {
1421                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1422                         res = AVERROR_IO;
1423                         break;
1424                     } else if (matroska->level_up > 0) {
1425                         matroska->level_up--;
1426                         break;
1427                     }
1428
1429                     switch (id) {
1430                         /* samplerate */
1431                         case MATROSKA_ID_AUDIOSAMPLINGFREQ: {
1432                             double num;
1433                             if ((res = ebml_read_float(matroska, &id,
1434                                                        &num)) < 0)
1435                                 break;
1436                             audiotrack->samplerate = num;
1437                             break;
1438                         }
1439
1440                             /* bitdepth */
1441                         case MATROSKA_ID_AUDIOBITDEPTH: {
1442                             uint64_t num;
1443                             if ((res = ebml_read_uint(matroska, &id,
1444                                                       &num)) < 0)
1445                                 break;
1446                             audiotrack->bitdepth = num;
1447                             break;
1448                         }
1449
1450                             /* channels */
1451                         case MATROSKA_ID_AUDIOCHANNELS: {
1452                             uint64_t num;
1453                             if ((res = ebml_read_uint(matroska, &id,
1454                                                       &num)) < 0)
1455                                 break;
1456                             audiotrack->channels = num;
1457                             break;
1458                         }
1459
1460                         default:
1461                             av_log(matroska->ctx, AV_LOG_INFO,
1462                                    "Unknown audio track header entry "
1463                                    "0x%x - ignoring\n", id);
1464                             /* pass-through */
1465
1466                         case EBML_ID_VOID:
1467                             res = ebml_read_skip(matroska);
1468                             break;
1469                     }
1470
1471                     if (matroska->level_up) {
1472                         matroska->level_up--;
1473                         break;
1474                     }
1475                 }
1476                 break;
1477             }
1478
1479                 /* codec identifier */
1480             case MATROSKA_ID_CODECID: {
1481                 char *text;
1482                 if ((res = ebml_read_ascii(matroska, &id, &text)) < 0)
1483                     break;
1484                 track->codec_id = text;
1485                 break;
1486             }
1487
1488                 /* codec private data */
1489             case MATROSKA_ID_CODECPRIVATE: {
1490                 uint8_t *data;
1491                 int size;
1492                 if ((res = ebml_read_binary(matroska, &id, &data, &size) < 0))
1493                     break;
1494                 track->codec_priv = data;
1495                 track->codec_priv_size = size;
1496                 break;
1497             }
1498
1499                 /* name of the codec */
1500             case MATROSKA_ID_CODECNAME: {
1501                 char *text;
1502                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1503                     break;
1504                 track->codec_name = text;
1505                 break;
1506             }
1507
1508                 /* name of this track */
1509             case MATROSKA_ID_TRACKNAME: {
1510                 char *text;
1511                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1512                     break;
1513                 track->name = text;
1514                 break;
1515             }
1516
1517                 /* language (matters for audio/subtitles, mostly) */
1518             case MATROSKA_ID_TRACKLANGUAGE: {
1519                 char *text;
1520                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1521                     break;
1522                 track->language = text;
1523                 break;
1524             }
1525
1526                 /* whether this is actually used */
1527             case MATROSKA_ID_TRACKFLAGENABLED: {
1528                 uint64_t num;
1529                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1530                     break;
1531                 if (num)
1532                     track->flags |= MATROSKA_TRACK_ENABLED;
1533                 else
1534                     track->flags &= ~MATROSKA_TRACK_ENABLED;
1535                 break;
1536             }
1537
1538                 /* whether it's the default for this track type */
1539             case MATROSKA_ID_TRACKFLAGDEFAULT: {
1540                 uint64_t num;
1541                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1542                     break;
1543                 if (num)
1544                     track->flags |= MATROSKA_TRACK_DEFAULT;
1545                 else
1546                     track->flags &= ~MATROSKA_TRACK_DEFAULT;
1547                 break;
1548             }
1549
1550                 /* lacing (like MPEG, where blocks don't end/start on frame
1551                  * boundaries) */
1552             case MATROSKA_ID_TRACKFLAGLACING: {
1553                 uint64_t num;
1554                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1555                     break;
1556                 if (num)
1557                     track->flags |= MATROSKA_TRACK_LACING;
1558                 else
1559                     track->flags &= ~MATROSKA_TRACK_LACING;
1560                 break;
1561             }
1562
1563                 /* default length (in time) of one data block in this track */
1564             case MATROSKA_ID_TRACKDEFAULTDURATION: {
1565                 uint64_t num;
1566                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1567                     break;
1568                 track->default_duration = num;
1569                 break;
1570             }
1571
1572             default:
1573                 av_log(matroska->ctx, AV_LOG_INFO,
1574                        "Unknown track header entry 0x%x - ignoring\n", id);
1575                 /* pass-through */
1576
1577             case EBML_ID_VOID:
1578             /* we ignore these because they're nothing useful. */
1579             case MATROSKA_ID_CODECINFOURL:
1580             case MATROSKA_ID_CODECDOWNLOADURL:
1581             case MATROSKA_ID_TRACKMINCACHE:
1582             case MATROSKA_ID_TRACKMAXCACHE:
1583                 res = ebml_read_skip(matroska);
1584                 break;
1585         }
1586
1587         if (matroska->level_up) {
1588             matroska->level_up--;
1589             break;
1590         }
1591     }
1592
1593     return res;
1594 }
1595
1596 static int
1597 matroska_parse_tracks (MatroskaDemuxContext *matroska)
1598 {
1599     int res = 0;
1600     uint32_t id;
1601
1602     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing tracks...\n");
1603
1604     while (res == 0) {
1605         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1606             res = AVERROR_IO;
1607             break;
1608         } else if (matroska->level_up) {
1609             matroska->level_up--;
1610             break;
1611         }
1612
1613         switch (id) {
1614             /* one track within the "all-tracks" header */
1615             case MATROSKA_ID_TRACKENTRY:
1616                 res = matroska_add_stream(matroska);
1617                 break;
1618
1619             default:
1620                 av_log(matroska->ctx, AV_LOG_INFO,
1621                        "Unknown entry 0x%x in track header\n", id);
1622                 /* fall-through */
1623
1624             case EBML_ID_VOID:
1625                 res = ebml_read_skip(matroska);
1626                 break;
1627         }
1628
1629         if (matroska->level_up) {
1630             matroska->level_up--;
1631             break;
1632         }
1633     }
1634
1635     return res;
1636 }
1637
1638 static int
1639 matroska_parse_index (MatroskaDemuxContext *matroska)
1640 {
1641     int res = 0;
1642     uint32_t id;
1643     MatroskaDemuxIndex idx;
1644
1645     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing index...\n");
1646
1647     while (res == 0) {
1648         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1649             res = AVERROR_IO;
1650             break;
1651         } else if (matroska->level_up) {
1652             matroska->level_up--;
1653             break;
1654         }
1655
1656         switch (id) {
1657             /* one single index entry ('point') */
1658             case MATROSKA_ID_POINTENTRY:
1659                 if ((res = ebml_read_master(matroska, &id)) < 0)
1660                     break;
1661
1662                 /* in the end, we hope to fill one entry with a
1663                  * timestamp, a file position and a tracknum */
1664                 idx.pos   = (uint64_t) -1;
1665                 idx.time  = (uint64_t) -1;
1666                 idx.track = (uint16_t) -1;
1667
1668                 while (res == 0) {
1669                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1670                         res = AVERROR_IO;
1671                         break;
1672                     } else if (matroska->level_up) {
1673                         matroska->level_up--;
1674                         break;
1675                     }
1676
1677                     switch (id) {
1678                         /* one single index entry ('point') */
1679                         case MATROSKA_ID_CUETIME: {
1680                             int64_t time;
1681                             if ((res = ebml_read_uint(matroska, &id,
1682                                                       &time)) < 0)
1683                                 break;
1684                             idx.time = time * matroska->time_scale;
1685                             break;
1686                         }
1687
1688                         /* position in the file + track to which it 
1689                          * belongs */
1690                         case MATROSKA_ID_CUETRACKPOSITION:
1691                             if ((res = ebml_read_master(matroska, &id)) < 0)
1692                                 break;
1693
1694                             while (res == 0) {
1695                                 if (!(id = ebml_peek_id (matroska,
1696                                                     &matroska->level_up))) {
1697                                     res = AVERROR_IO;
1698                                     break;
1699                                 } else if (matroska->level_up) {
1700                                     matroska->level_up--;
1701                                     break;
1702                                 }
1703
1704                                 switch (id) {
1705                                     /* track number */
1706                                     case MATROSKA_ID_CUETRACK: {
1707                                         uint64_t num;
1708                                         if ((res = ebml_read_uint(matroska,
1709                                                           &id, &num)) < 0)
1710                                             break;
1711                                         idx.track = num;
1712                                         break;
1713                                     }
1714
1715                                         /* position in file */
1716                                     case MATROSKA_ID_CUECLUSTERPOSITION: {
1717                                         uint64_t num;
1718                                         if ((res = ebml_read_uint(matroska,
1719                                                           &id, &num)) < 0)
1720                                             break;
1721                                         idx.pos = num;
1722                                         break;
1723                                     }
1724
1725                                     default:
1726                                         av_log(matroska->ctx, AV_LOG_INFO,
1727                                                "Unknown entry 0x%x in "
1728                                                "CuesTrackPositions\n", id);
1729                                         /* fall-through */
1730
1731                                     case EBML_ID_VOID:
1732                                         res = ebml_read_skip(matroska);
1733                                         break;
1734                                 }
1735
1736                                 if (matroska->level_up) {
1737                                     matroska->level_up--;
1738                                     break;
1739                                 }
1740                             }
1741
1742                             break;
1743
1744                         default:
1745                             av_log(matroska->ctx, AV_LOG_INFO,
1746                                    "Unknown entry 0x%x in cuespoint "
1747                                    "index\n", id);
1748                             /* fall-through */
1749
1750                         case EBML_ID_VOID:
1751                             res = ebml_read_skip(matroska);
1752                             break;
1753                     }
1754
1755                     if (matroska->level_up) {
1756                         matroska->level_up--;
1757                         break;
1758                     }
1759                 }
1760
1761                 /* so let's see if we got what we wanted */
1762                 if (idx.pos   != (uint64_t) -1 &&
1763                     idx.time  != (uint64_t) -1 &&
1764                     idx.track != (uint16_t) -1) {
1765                     if (matroska->num_indexes % 32 == 0) {
1766                         /* re-allocate bigger index */
1767                         matroska->index =
1768                             av_realloc(matroska->index,
1769                                        (matroska->num_indexes + 32) *
1770                                        sizeof(MatroskaDemuxIndex));
1771                     }
1772                     matroska->index[matroska->num_indexes] = idx;
1773                     matroska->num_indexes++;
1774                 }
1775                 break;
1776
1777             default:
1778                 av_log(matroska->ctx, AV_LOG_INFO,
1779                        "Unknown entry 0x%x in cues header\n", id);
1780                 /* fall-through */
1781
1782             case EBML_ID_VOID:
1783                 res = ebml_read_skip(matroska);
1784                 break;
1785         }
1786
1787         if (matroska->level_up) {
1788             matroska->level_up--;
1789             break;
1790         }
1791     }
1792
1793     return res;
1794 }
1795
1796 static int
1797 matroska_parse_metadata (MatroskaDemuxContext *matroska)
1798 {
1799     int res = 0;
1800     uint32_t id;
1801
1802     while (res == 0) {
1803         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1804             res = AVERROR_IO;
1805             break;
1806         } else if (matroska->level_up) {
1807             matroska->level_up--;
1808             break;
1809         }
1810
1811         switch (id) {
1812             /* Hm, this is unsupported... */
1813             default:
1814                 av_log(matroska->ctx, AV_LOG_INFO,
1815                        "Unknown entry 0x%x in metadata header\n", id);
1816                 /* fall-through */
1817
1818             case EBML_ID_VOID:
1819                 res = ebml_read_skip(matroska);
1820                 break;
1821         }
1822
1823         if (matroska->level_up) {
1824             matroska->level_up--;
1825             break;
1826         }
1827     }
1828
1829     return res;
1830 }
1831
1832 static int
1833 matroska_parse_seekhead (MatroskaDemuxContext *matroska)
1834 {
1835     int res = 0;
1836     uint32_t id;
1837
1838     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing seekhead...\n");
1839
1840     while (res == 0) {
1841         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1842             res = AVERROR_IO;
1843             break;
1844         } else if (matroska->level_up) {
1845             matroska->level_up--;
1846             break;
1847         }
1848
1849         switch (id) {
1850             case MATROSKA_ID_SEEKENTRY: {
1851                 uint32_t seek_id = 0, peek_id_cache = 0;
1852                 uint64_t seek_pos = (uint64_t) -1, t;
1853
1854                 if ((res = ebml_read_master(matroska, &id)) < 0)
1855                     break;
1856
1857                 while (res == 0) {
1858                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1859                         res = AVERROR_IO;
1860                         break;
1861                     } else if (matroska->level_up) {
1862                         matroska->level_up--;
1863                         break;
1864                     }
1865
1866                     switch (id) {
1867                         case MATROSKA_ID_SEEKID:
1868                             res = ebml_read_uint(matroska, &id, &t);
1869                             seek_id = t;
1870                             break;
1871
1872                         case MATROSKA_ID_SEEKPOSITION:
1873                             res = ebml_read_uint(matroska, &id, &seek_pos);
1874                             break;
1875
1876                         default:
1877                             av_log(matroska->ctx, AV_LOG_INFO,
1878                                    "Unknown seekhead ID 0x%x\n", id);
1879                             /* fall-through */
1880
1881                         case EBML_ID_VOID:
1882                             res = ebml_read_skip(matroska);
1883                             break;
1884                     }
1885
1886                     if (matroska->level_up) {
1887                         matroska->level_up--;
1888                         break;
1889                     }
1890                 }
1891
1892                 if (!seek_id || seek_pos == (uint64_t) -1) {
1893                     av_log(matroska->ctx, AV_LOG_INFO,
1894                            "Incomplete seekhead entry (0x%x/%llu)\n",
1895                            seek_id, seek_pos);
1896                     break;
1897                 }
1898
1899                 switch (seek_id) {
1900                     case MATROSKA_ID_CUES:
1901                     case MATROSKA_ID_TAGS: {
1902                         uint32_t level_up = matroska->level_up;
1903                         offset_t before_pos;
1904                         uint64_t length;
1905                         MatroskaLevel level;
1906
1907                         /* remember the peeked ID and the current position */
1908                         peek_id_cache = matroska->peek_id;
1909                         before_pos = url_ftell(&matroska->ctx->pb);
1910
1911                         /* seek */
1912                         if ((res = ebml_read_seek(matroska, seek_pos +
1913                                                matroska->segment_start)) < 0)
1914                             return res;
1915
1916                         /* we don't want to lose our seekhead level, so we add
1917                          * a dummy. This is a crude hack. */
1918                         if (matroska->num_levels == EBML_MAX_DEPTH) {
1919                             av_log(matroska->ctx, AV_LOG_INFO,
1920                                    "Max EBML element depth (%d) reached, "
1921                                    "cannot parse further.\n", EBML_MAX_DEPTH);
1922                             return AVERROR_UNKNOWN;
1923                         }
1924                             
1925                         level.start = 0;
1926                         level.length = (uint64_t)-1;
1927                         matroska->levels[matroska->num_levels] = level;
1928                         matroska->num_levels++;
1929
1930                         /* check ID */
1931                         if (!(id = ebml_peek_id (matroska,
1932                                                  &matroska->level_up)))
1933                             break;
1934                         if (id != seek_id) {
1935                             av_log(matroska->ctx, AV_LOG_INFO,
1936                                    "We looked for ID=0x%x but got "
1937                                    "ID=0x%x (pos=%llu)",
1938                                    seek_id, id, seek_pos +
1939                                    matroska->segment_start);
1940                             goto finish;
1941                         }
1942
1943                         /* read master + parse */
1944                         if ((res = ebml_read_master(matroska, &id)) < 0)
1945                             break;
1946                         switch (id) {
1947                             case MATROSKA_ID_CUES:
1948                                 if (!(res = matroska_parse_index(matroska)) ||
1949                                     url_feof(&matroska->ctx->pb)) {
1950                                     matroska->index_parsed = 1;
1951                                     res = 0;
1952                                 }
1953                                 break;
1954                             case MATROSKA_ID_TAGS:
1955                                 if (!(res = matroska_parse_metadata(matroska)) ||
1956                                    url_feof(&matroska->ctx->pb)) {
1957                                     matroska->metadata_parsed = 1;
1958                                     res = 0;
1959                                 }
1960                                 break;
1961                         }
1962                         if (res < 0)
1963                             break;
1964
1965                     finish:
1966                         /* remove dummy level */
1967                         while (matroska->num_levels) {
1968                             matroska->num_levels--;
1969                             length =
1970                                 matroska->levels[matroska->num_levels].length;
1971                             if (length == (uint64_t)-1)
1972                                 break;
1973                         }
1974
1975                         /* seek back */
1976                         if ((res = ebml_read_seek(matroska, before_pos)) < 0)
1977                             return res;
1978                         matroska->peek_id = peek_id_cache;
1979                         matroska->level_up = level_up;
1980                         break;
1981                     }
1982
1983                     default:
1984                         av_log(matroska->ctx, AV_LOG_INFO,
1985                                "Ignoring seekhead entry for ID=0x%x\n",
1986                                seek_id);
1987                         break;
1988                 }
1989
1990                 break;
1991             }
1992
1993             default:
1994                 av_log(matroska->ctx, AV_LOG_INFO,
1995                        "Unknown seekhead ID 0x%x\n", id);
1996                 /* fall-through */
1997
1998             case EBML_ID_VOID:
1999                 res = ebml_read_skip(matroska);
2000                 break;
2001         }
2002
2003         if (matroska->level_up) {
2004             matroska->level_up--;
2005             break;
2006         }
2007     }
2008
2009     return res;
2010 }
2011
2012 static int
2013 matroska_read_header (AVFormatContext    *s,
2014                       AVFormatParameters *ap)
2015 {
2016     MatroskaDemuxContext *matroska = s->priv_data;
2017     char *doctype;
2018     int version, last_level, res = 0;
2019     uint32_t id;
2020
2021     matroska->ctx = s;
2022
2023     /* First read the EBML header. */
2024     doctype = NULL;
2025     if ((res = ebml_read_header(matroska, &doctype, &version)) < 0)
2026         return res;
2027     if ((doctype == NULL) || strcmp(doctype, "matroska")) {
2028         av_log(matroska->ctx, AV_LOG_ERROR,
2029                "Wrong EBML doctype ('%s' != 'matroska').\n",
2030                doctype ? doctype : "(none)");
2031         if (doctype)
2032             av_free(doctype);
2033         return AVERROR_NOFMT;
2034     }
2035     av_free(doctype);
2036     if (version != 1) {
2037         av_log(matroska->ctx, AV_LOG_ERROR,
2038                "Matroska demuxer version 1 too old for file version %d\n",
2039                version);
2040         return AVERROR_NOFMT;
2041     }
2042
2043     /* The next thing is a segment. */
2044     while (1) {
2045         if (!(id = ebml_peek_id(matroska, &last_level)))
2046             return AVERROR_IO;
2047         if (id == MATROSKA_ID_SEGMENT)
2048             break;
2049
2050         /* oi! */
2051         av_log(matroska->ctx, AV_LOG_INFO,
2052                "Expected a Segment ID (0x%x), but received 0x%x!\n",
2053                MATROSKA_ID_SEGMENT, id);
2054         if ((res = ebml_read_skip(matroska)) < 0)
2055             return res;
2056     }
2057
2058     /* We now have a Matroska segment.
2059      * Seeks are from the beginning of the segment,
2060      * after the segment ID/length. */
2061     if ((res = ebml_read_master(matroska, &id)) < 0)
2062         return res;
2063     matroska->segment_start = url_ftell(&s->pb);
2064
2065     matroska->time_scale = 1000000;
2066     /* we've found our segment, start reading the different contents in here */
2067     while (res == 0) {
2068         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2069             res = AVERROR_IO;
2070             break;
2071         } else if (matroska->level_up) {
2072             matroska->level_up--;
2073             break;
2074         }
2075
2076         switch (id) {
2077             /* stream info */
2078             case MATROSKA_ID_INFO: {
2079                 if ((res = ebml_read_master(matroska, &id)) < 0)
2080                     break;
2081                 res = matroska_parse_info(matroska);
2082                 break;
2083             }
2084
2085             /* track info headers */
2086             case MATROSKA_ID_TRACKS: {
2087                 if ((res = ebml_read_master(matroska, &id)) < 0)
2088                     break;
2089                 res = matroska_parse_tracks(matroska);
2090                 break;
2091             }
2092
2093             /* stream index */
2094             case MATROSKA_ID_CUES: {
2095                 if (!matroska->index_parsed) {
2096                     if ((res = ebml_read_master(matroska, &id)) < 0)
2097                         break;
2098                     res = matroska_parse_index(matroska);
2099                 } else
2100                     res = ebml_read_skip(matroska);
2101                 break;
2102             }
2103
2104             /* metadata */
2105             case MATROSKA_ID_TAGS: {
2106                 if (!matroska->metadata_parsed) {
2107                     if ((res = ebml_read_master(matroska, &id)) < 0)
2108                         break;
2109                     res = matroska_parse_metadata(matroska);
2110                 } else
2111                     res = ebml_read_skip(matroska);
2112                 break;
2113             }
2114
2115             /* file index (if seekable, seek to Cues/Tags to parse it) */
2116             case MATROSKA_ID_SEEKHEAD: {
2117                 if ((res = ebml_read_master(matroska, &id)) < 0)
2118                     break;
2119                 res = matroska_parse_seekhead(matroska);
2120                 break;
2121             }
2122
2123             case MATROSKA_ID_CLUSTER: {
2124                 /* Do not read the master - this will be done in the next
2125                  * call to matroska_read_packet. */
2126                 res = 1;
2127                 break;
2128             }
2129
2130             default:
2131                 av_log(matroska->ctx, AV_LOG_INFO,
2132                        "Unknown matroska file header ID 0x%x\n", id);
2133             /* fall-through */
2134
2135             case EBML_ID_VOID:
2136                 res = ebml_read_skip(matroska);
2137                 break;
2138         }
2139
2140         if (matroska->level_up) {
2141             matroska->level_up--;
2142             break;
2143         }
2144     }
2145
2146     if (res < 0)
2147         return res;
2148
2149     /* Have we found a cluster? */
2150     if (res == 1) {
2151         int i;
2152         enum CodecID codec_id;
2153         MatroskaTrack *track;
2154         AVStream *st;
2155         void *extradata = NULL;
2156         int extradata_size = 0;
2157
2158         for (i = 0; i < matroska->num_tracks; i++) {
2159             track = matroska->tracks[i];
2160
2161             /* libavformat does not really support subtitles.
2162              * Also apply some sanity checks. */
2163             if ((track->type == MATROSKA_TRACK_TYPE_SUBTITLE) ||
2164                 (track->codec_id == NULL))
2165                 continue;
2166
2167             /* Set the FourCC from the CodecID. */
2168             /* This is the MS compatibility mode which stores a
2169              * BITMAPINFOHEADER in the CodecPrivate. */
2170             if (!strcmp(track->codec_id,
2171                         MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC) &&
2172                 (track->codec_priv_size >= 40) &&
2173                 (track->codec_priv != NULL)) {
2174                 unsigned char *p;
2175
2176                 /* Offset of biCompression. Stored in LE. */
2177                 p = (unsigned char *)track->codec_priv + 16;
2178                 ((MatroskaVideoTrack *)track)->fourcc = (p[3] << 24) |
2179                                  (p[2] << 16) | (p[1] << 8) | p[0];
2180                 codec_id = codec_get_bmp_id(((MatroskaVideoTrack *)track)->fourcc);
2181
2182             } else if (!strcmp(track->codec_id,
2183                                MATROSKA_CODEC_ID_VIDEO_MPEG4_SP) ||
2184                        !strcmp(track->codec_id,
2185                                MATROSKA_CODEC_ID_VIDEO_MPEG4_ASP) ||
2186                        !strcmp(track->codec_id,
2187                                MATROSKA_CODEC_ID_VIDEO_MPEG4_AP))
2188                 codec_id = CODEC_ID_MPEG4;
2189             else if (!strcmp(track->codec_id,
2190                              MATROSKA_CODEC_ID_VIDEO_MPEG4_AVC))
2191                 codec_id = CODEC_ID_H264;
2192 /*             else if (!strcmp(track->codec_id, */
2193 /*                              MATROSKA_CODEC_ID_VIDEO_UNCOMPRESSED)) */
2194 /*                 codec_id = CODEC_ID_???; */
2195             else if (!strcmp(track->codec_id,
2196                              MATROSKA_CODEC_ID_VIDEO_MSMPEG4V3))
2197                 codec_id = CODEC_ID_MSMPEG4V3;
2198             else if (!strcmp(track->codec_id,
2199                              MATROSKA_CODEC_ID_VIDEO_MPEG1) ||
2200                      !strcmp(track->codec_id,
2201                              MATROSKA_CODEC_ID_VIDEO_MPEG2))
2202                 codec_id = CODEC_ID_MPEG2VIDEO;
2203
2204             /* This is the MS compatibility mode which stores a
2205              * WAVEFORMATEX in the CodecPrivate. */
2206             else if (!strcmp(track->codec_id, 
2207                              MATROSKA_CODEC_ID_AUDIO_ACM) &&
2208                 (track->codec_priv_size >= 18) &&
2209                 (track->codec_priv != NULL)) {
2210                 unsigned char *p;
2211                 uint16_t tag;
2212
2213                 /* Offset of wFormatTag. Stored in LE. */
2214                 p = (unsigned char *)track->codec_priv;
2215                 tag = (p[1] << 8) | p[0];
2216                 codec_id = codec_get_wav_id(tag);
2217
2218             } else if (!strcmp(track->codec_id,
2219                                MATROSKA_CODEC_ID_AUDIO_MPEG1_L1) ||
2220                        !strcmp(track->codec_id,
2221                                MATROSKA_CODEC_ID_AUDIO_MPEG1_L2) ||
2222                        !strcmp(track->codec_id,
2223                                MATROSKA_CODEC_ID_AUDIO_MPEG1_L3))
2224                 codec_id = CODEC_ID_MP3;
2225             else if (!strcmp(track->codec_id,
2226                              MATROSKA_CODEC_ID_AUDIO_PCM_INT_BE))
2227                 codec_id = CODEC_ID_PCM_U16BE;
2228             else if (!strcmp(track->codec_id,
2229                              MATROSKA_CODEC_ID_AUDIO_PCM_INT_LE))
2230                 codec_id = CODEC_ID_PCM_U16LE;
2231 /*             else if (!strcmp(track->codec_id, */
2232 /*                              MATROSKA_CODEC_ID_AUDIO_PCM_FLOAT)) */
2233 /*                 codec_id = CODEC_ID_PCM_???; */
2234             else if (!strcmp(track->codec_id,
2235                              MATROSKA_CODEC_ID_AUDIO_AC3))
2236                 codec_id = CODEC_ID_AC3;
2237             else if (!strcmp(track->codec_id,
2238                              MATROSKA_CODEC_ID_AUDIO_DTS))
2239                 codec_id = CODEC_ID_DTS;
2240             /* No such codec id so far. */
2241 /*             else if (!strcmp(track->codec_id, */
2242 /*                              MATROSKA_CODEC_ID_AUDIO_DTS)) */
2243 /*                 codec_id = CODEC_ID_DTS; */
2244             else if (!strcmp(track->codec_id,
2245                              MATROSKA_CODEC_ID_AUDIO_VORBIS)) {
2246                 unsigned char *p = track->codec_priv, *cdp;
2247                 int cps = track->codec_priv_size;
2248                 int nf, s[3], cds;
2249                 int i;
2250
2251                 nf = *p++;
2252                 cps--;
2253
2254                 if(nf != 2)
2255                     continue;
2256
2257                 for(i = 0; i < 2; i++){
2258                     int xv;
2259                     s[i] = 0;
2260                     do {
2261                         xv = *p++;
2262                         s[i] += xv;
2263                         cps--;
2264                     } while(xv == 255);
2265                 }
2266
2267                 s[2] = cps - s[0] - s[1];
2268
2269                 cds = cps + 6;
2270                 extradata = cdp = av_malloc(cds);
2271                 if(extradata == NULL)
2272                     return AVERROR_NOMEM;
2273                 extradata_size = cds;
2274
2275                 for(i = 0; i < 3; i++){
2276                     *cdp++ = s[i] >> 8;
2277                     *cdp++ = s[i] & 0xff;
2278                     memcpy(cdp, p, s[i]);
2279                     cdp += s[i];
2280                     p += s[i];
2281                     cps -= s[i];
2282                 }
2283
2284                 codec_id = CODEC_ID_VORBIS;
2285             } else if (!strcmp(track->codec_id,
2286                                MATROSKA_CODEC_ID_AUDIO_MPEG2) ||
2287                        !strcmp(track->codec_id,
2288                                MATROSKA_CODEC_ID_AUDIO_MPEG4))
2289                 codec_id = CODEC_ID_AAC;
2290             else
2291                 codec_id = CODEC_ID_NONE;
2292
2293             if (codec_id == CODEC_ID_NONE) {
2294                 av_log(matroska->ctx, AV_LOG_INFO,
2295                        "Unknown/unsupported CodecID %s.\n",
2296                        track->codec_id);
2297             }
2298
2299             track->stream_index = matroska->num_streams;
2300
2301             matroska->num_streams++;
2302             st = av_new_stream(s, track->stream_index);
2303             if (st == NULL)
2304                 return AVERROR_NOMEM;
2305             av_set_pts_info(st, 24, 1, 1000); /* 24 bit pts in ms */
2306
2307             st->codec.codec_id = codec_id;
2308
2309             if(extradata){
2310                 st->codec.extradata = extradata;
2311                 st->codec.extradata_size = extradata_size;
2312             } else if(track->codec_priv && track->codec_priv_size > 0){
2313                 st->codec.extradata = av_malloc(track->codec_priv_size);
2314                 if(st->codec.extradata == NULL)
2315                     return AVERROR_NOMEM;
2316                 st->codec.extradata_size = track->codec_priv_size;
2317                 memcpy(st->codec.extradata, track->codec_priv,
2318                        track->codec_priv_size);
2319             }
2320
2321             if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
2322                 MatroskaVideoTrack *videotrack = (MatroskaVideoTrack *)track;
2323
2324                 st->codec.codec_type = CODEC_TYPE_VIDEO;
2325                 st->codec.codec_tag = videotrack->fourcc;
2326                 st->codec.width = videotrack->pixel_width;
2327                 st->codec.height = videotrack->pixel_height;
2328                 if (videotrack->display_width == 0)
2329                     st->codec.sample_aspect_ratio.num =
2330                         videotrack->pixel_width;
2331                 else
2332                     st->codec.sample_aspect_ratio.num =
2333                         videotrack->display_width;
2334                 if (videotrack->display_height == 0)
2335                     st->codec.sample_aspect_ratio.num =
2336                         videotrack->pixel_height;
2337                 else
2338                     st->codec.sample_aspect_ratio.num =
2339                         videotrack->display_height;
2340
2341             } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
2342                 MatroskaAudioTrack *audiotrack = (MatroskaAudioTrack *)track;
2343
2344                 st->codec.codec_type = CODEC_TYPE_AUDIO;
2345                 st->codec.sample_rate = audiotrack->samplerate;
2346                 st->codec.channels = audiotrack->channels;
2347             }
2348
2349             /* What do we do with private data? E.g. for Vorbis. */
2350         }
2351     }
2352
2353     return 0;
2354 }
2355
2356 static int
2357 matroska_find_track_by_num (MatroskaDemuxContext *matroska,
2358                             int                   num)
2359 {
2360     int i;
2361
2362     for (i = 0; i < matroska->num_tracks; i++)
2363         if (matroska->tracks[i]->num == num)
2364             return i;
2365
2366     return -1;
2367 }
2368
2369 static int
2370 matroska_parse_blockgroup (MatroskaDemuxContext *matroska,
2371                            uint64_t              cluster_time)
2372 {
2373     int res = 0;
2374     uint32_t id;
2375     AVPacket *pkt;
2376     int is_keyframe = PKT_FLAG_KEY, last_num_packets = matroska->num_packets;
2377
2378     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing blockgroup...\n");
2379
2380     while (res == 0) {
2381         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2382             res = AVERROR_IO;
2383             break;
2384         } else if (matroska->level_up) {
2385             matroska->level_up--;
2386             break;
2387         }
2388
2389         switch (id) {
2390             /* one block inside the group. Note, block parsing is one
2391              * of the harder things, so this code is a bit complicated.
2392              * See http://www.matroska.org/ for documentation. */
2393             case MATROSKA_ID_BLOCK: {
2394                 uint8_t *data, *origdata;
2395                 int size;
2396                 uint64_t time;
2397                 uint32_t *lace_size = NULL;
2398                 int n, track, flags, laces = 0;
2399                 uint64_t num;
2400
2401                 if ((res = ebml_read_binary(matroska, &id, &data, &size)) < 0)
2402                     break;
2403                 origdata = data;
2404
2405                 /* first byte(s): blocknum */
2406                 if ((n = matroska_ebmlnum_uint(data, size, &num)) < 0) {
2407                     av_log(matroska->ctx, AV_LOG_ERROR,
2408                            "EBML block data error\n");
2409                     av_free(origdata);
2410                     break;
2411                 }
2412                 data += n;
2413                 size -= n;
2414
2415                 /* fetch track from num */
2416                 track = matroska_find_track_by_num(matroska, num);
2417                 if (size <= 3 || track < 0 || track >= matroska->num_tracks) {
2418                     av_log(matroska->ctx, AV_LOG_INFO,
2419                            "Invalid stream %d or size %u\n", track, size);
2420                     av_free(origdata);
2421                     break;
2422                 }
2423                 if(matroska->ctx->streams[ matroska->tracks[track]->stream_index ]->discard){
2424                     av_free(origdata);
2425                     break;                
2426                 }
2427
2428                 /* time (relative to cluster time) */
2429                 time = ((data[0] << 8) | data[1]) * matroska->time_scale;
2430                 data += 2;
2431                 size -= 2;
2432                 flags = *data;
2433                 data += 1;
2434                 size -= 1;
2435                 switch ((flags & 0x06) >> 1) {
2436                     case 0x0: /* no lacing */
2437                         laces = 1;
2438                         lace_size = av_mallocz(sizeof(int));
2439                         lace_size[0] = size;
2440                         break;
2441
2442                     case 0x1: /* xiph lacing */
2443                     case 0x2: /* fixed-size lacing */
2444                     case 0x3: /* EBML lacing */
2445                         if (size == 0) {
2446                             res = -1;
2447                             break;
2448                         }
2449                         laces = (*data) + 1;
2450                         data += 1;
2451                         size -= 1;
2452                         lace_size = av_mallocz(laces * sizeof(int));
2453
2454                         switch ((flags & 0x06) >> 1) {
2455                             case 0x1: /* xiph lacing */ {
2456                                 uint8_t temp;
2457                                 uint32_t total = 0;
2458                                 for (n = 0; res == 0 && n < laces - 1; n++) {
2459                                     while (1) {
2460                                         if (size == 0) {
2461                                             res = -1;
2462                                             break;
2463                                         }
2464                                         temp = *data;
2465                                         lace_size[n] += temp;
2466                                         data += 1;
2467                                         size -= 1;
2468                                         if (temp != 0xff)
2469                                             break;
2470                                     }
2471                                     total += lace_size[n];
2472                                 }
2473                                 lace_size[n] = size - total;
2474                                 break;
2475                             }
2476
2477                             case 0x2: /* fixed-size lacing */
2478                                 for (n = 0; n < laces; n++)
2479                                     lace_size[n] = size / laces;
2480                                 break;
2481
2482                             case 0x3: /* EBML lacing */ {
2483                                 uint32_t total;
2484                                 n = matroska_ebmlnum_uint(data, size, &num);
2485                                 if (n < 0) {
2486                                     av_log(matroska->ctx, AV_LOG_INFO,
2487                                            "EBML block data error\n");
2488                                     break;
2489                                 }
2490                                 data += n;
2491                                 size -= n;
2492                                 total = lace_size[0] = num;
2493                                 for (n = 1; res == 0 && n < laces - 1; n++) {
2494                                     int64_t snum;
2495                                     int r;
2496                                     r = matroska_ebmlnum_sint (data, size,
2497                                                                &snum);
2498                                     if (r < 0) {
2499                                         av_log(matroska->ctx, AV_LOG_INFO,
2500                                                "EBML block data error\n");
2501                                         break;
2502                                     }
2503                                     data += r;
2504                                     size -= r;
2505                                     lace_size[n] = lace_size[n - 1] + snum;
2506                                     total += lace_size[n];
2507                                 }
2508                                 lace_size[n] = size - total;
2509                                 break;
2510                             }
2511                         }
2512                         break;
2513                 }
2514
2515                 if (res == 0) {
2516                     for (n = 0; n < laces; n++) {
2517                         uint64_t timecode = 0;
2518
2519                         pkt = av_mallocz(sizeof(AVPacket));
2520                         /* XXX: prevent data copy... */
2521                         if (av_new_packet(pkt,lace_size[n]) < 0) {
2522                             res = AVERROR_NOMEM;
2523                             break;
2524                         }
2525                         if (cluster_time != (uint64_t)-1) {
2526                             if (time < 0 && (-time) > cluster_time)
2527                                 timecode = cluster_time;
2528                             else
2529                                 timecode = cluster_time + time;
2530                         }
2531                         /* FIXME: duration */
2532
2533                         memcpy(pkt->data, data, lace_size[n]);
2534                         data += lace_size[n];
2535                         if (n == 0)
2536                             pkt->flags = is_keyframe;
2537                         pkt->stream_index =
2538                             matroska->tracks[track]->stream_index;
2539
2540                         pkt->pts = timecode / 1000000; /* ns to ms */
2541
2542                         matroska_queue_packet(matroska, pkt);
2543                     }
2544                 }
2545
2546                 av_free(lace_size);
2547                 av_free(origdata);
2548                 break;
2549             }
2550
2551             case MATROSKA_ID_BLOCKDURATION: {
2552                 uint64_t num;
2553                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
2554                     break;
2555                 av_log(matroska->ctx, AV_LOG_INFO,
2556                        "FIXME: implement support for BlockDuration\n");
2557                 break;
2558             }
2559
2560             case MATROSKA_ID_BLOCKREFERENCE:
2561                 /* We've found a reference, so not even the first frame in
2562                  * the lace is a key frame. */
2563                 is_keyframe = 0;
2564                 if (last_num_packets != matroska->num_packets)
2565                     matroska->packets[last_num_packets]->flags = 0;
2566                 res = ebml_read_skip(matroska);
2567                 break;
2568
2569             default:
2570                 av_log(matroska->ctx, AV_LOG_INFO,
2571                        "Unknown entry 0x%x in blockgroup data\n", id);
2572                 /* fall-through */
2573
2574             case EBML_ID_VOID:
2575                 res = ebml_read_skip(matroska);
2576                 break;
2577         }
2578
2579         if (matroska->level_up) {
2580             matroska->level_up--;
2581             break;
2582         }
2583     }
2584
2585     return res;
2586 }
2587
2588 static int
2589 matroska_parse_cluster (MatroskaDemuxContext *matroska)
2590 {
2591     int res = 0;
2592     uint32_t id;
2593     uint64_t cluster_time = 0;
2594
2595     av_log(matroska->ctx, AV_LOG_DEBUG,
2596            "parsing cluster at %lld\n", url_ftell(&matroska->ctx->pb));
2597
2598     while (res == 0) {
2599         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2600             res = AVERROR_IO;
2601             break;
2602         } else if (matroska->level_up) {
2603             matroska->level_up--;
2604             break;
2605         }
2606
2607         switch (id) {
2608             /* cluster timecode */
2609             case MATROSKA_ID_CLUSTERTIMECODE: {
2610                 uint64_t num;
2611                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
2612                     break;
2613                 cluster_time = num * matroska->time_scale;
2614                 break;
2615             }
2616
2617                 /* a group of blocks inside a cluster */
2618             case MATROSKA_ID_BLOCKGROUP:
2619                 if ((res = ebml_read_master(matroska, &id)) < 0)
2620                     break;
2621                 res = matroska_parse_blockgroup(matroska, cluster_time);
2622                 break;
2623
2624             default:
2625                 av_log(matroska->ctx, AV_LOG_INFO,
2626                        "Unknown entry 0x%x in cluster data\n", id);
2627                 /* fall-through */
2628
2629             case EBML_ID_VOID:
2630                 res = ebml_read_skip(matroska);
2631                 break;
2632         }
2633
2634         if (matroska->level_up) {
2635             matroska->level_up--;
2636             break;
2637         }
2638     }
2639
2640     return res;
2641 }
2642
2643 static int
2644 matroska_read_packet (AVFormatContext *s,
2645                       AVPacket        *pkt)
2646 {
2647     MatroskaDemuxContext *matroska = s->priv_data;
2648     int res = 0;
2649     uint32_t id;
2650
2651     /* Do we still have a packet queued? */
2652     if (matroska_deliver_packet(matroska, pkt) == 0)
2653         return 0;
2654
2655     /* Have we already reached the end? */
2656     if (matroska->done)
2657         return AVERROR_IO;
2658
2659     while (res == 0) {
2660         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2661             res = AVERROR_IO;
2662             break;
2663         } else if (matroska->level_up) {
2664             matroska->level_up--;
2665             break;
2666         }
2667
2668         switch (id) {
2669             case MATROSKA_ID_CLUSTER:
2670                 if ((res = ebml_read_master(matroska, &id)) < 0)
2671                     break;
2672                 if ((res = matroska_parse_cluster(matroska)) == 0)
2673                     res = 1; /* Parsed one cluster, let's get out. */
2674                 break;
2675
2676             default:
2677             case EBML_ID_VOID:
2678                 res = ebml_read_skip(matroska);
2679                 break;
2680         }
2681
2682         if (matroska->level_up) {
2683             matroska->level_up--;
2684             break;
2685         }
2686     }
2687
2688     if (res == -1)
2689         matroska->done = 1;
2690
2691     return matroska_deliver_packet(matroska, pkt);
2692 }
2693
2694 static int
2695 matroska_read_close (AVFormatContext *s)
2696 {
2697     MatroskaDemuxContext *matroska = s->priv_data;
2698     int n = 0;
2699
2700     if (matroska->writing_app)
2701         av_free(matroska->writing_app);
2702     if (matroska->muxing_app)
2703         av_free(matroska->muxing_app);
2704     if (matroska->index)
2705         av_free(matroska->index);
2706
2707     if (matroska->packets != NULL) {
2708         for (n = 0; n < matroska->num_packets; n++) {
2709             av_free_packet(matroska->packets[n]);
2710             av_free(matroska->packets[n]);
2711         }
2712         av_free(matroska->packets);
2713     }
2714
2715     for (n = 0; n < matroska->num_tracks; n++) {
2716         MatroskaTrack *track = matroska->tracks[n];
2717         if (track->codec_id)
2718             av_free(track->codec_id);
2719         if (track->codec_name)
2720             av_free(track->codec_name);
2721         if (track->codec_priv)
2722             av_free(track->codec_priv);
2723         if (track->name)
2724             av_free(track->name);
2725         if (track->language)
2726             av_free(track->language);
2727
2728         av_free(track);
2729     }
2730
2731     for (n = 0; n < s->nb_streams; n++) {
2732         av_free(s->streams[n]->codec.extradata);
2733     }
2734
2735     memset(matroska, 0, sizeof(MatroskaDemuxContext));
2736
2737     return 0;
2738 }
2739
2740 static AVInputFormat matroska_iformat = {
2741     "matroska",
2742     "Matroska file format",
2743     sizeof(MatroskaDemuxContext),
2744     matroska_probe,
2745     matroska_read_header,
2746     matroska_read_packet,
2747     matroska_read_close,
2748 };
2749
2750 int
2751 matroska_init(void)
2752 {
2753     av_register_input_format(&matroska_iformat);
2754     return 0;
2755 }