]> git.sesse.net Git - ffmpeg/blob - libavformat/matroska.c
unknown format char
[ffmpeg] / libavformat / matroska.c
1 /*
2  * Matroska file demuxer (no muxer yet)
3  * Copyright (c) 2003-2004 The ffmpeg Project
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19
20 /**
21  * @file matroska.c
22  * Matroska file demuxer
23  * by Ronald Bultje <rbultje@ronald.bitfreak.net>
24  * with a little help from Moritz Bunkus <moritz@bunkus.org>
25  * Specs available on the matroska project page:
26  * http://www.matroska.org/.
27  */
28
29 #include "avformat.h"
30 /* For codec_get_bmp_id and codec_get_wav_id. */
31 #include "avi.h"
32
33 /* EBML version supported */
34 #define EBML_VERSION 1
35
36 /* top-level master-IDs */
37 #define EBML_ID_HEADER             0x1A45DFA3
38
39 /* IDs in the HEADER master */
40 #define EBML_ID_EBMLVERSION        0x4286
41 #define EBML_ID_EBMLREADVERSION    0x42F7
42 #define EBML_ID_EBMLMAXIDLENGTH    0x42F2
43 #define EBML_ID_EBMLMAXSIZELENGTH  0x42F3
44 #define EBML_ID_DOCTYPE            0x4282
45 #define EBML_ID_DOCTYPEVERSION     0x4287
46 #define EBML_ID_DOCTYPEREADVERSION 0x4285
47
48 /* general EBML types */
49 #define EBML_ID_VOID               0xEC
50
51 /*
52  * Matroska element IDs. max. 32-bit.
53  */
54
55 /* toplevel segment */
56 #define MATROSKA_ID_SEGMENT    0x18538067
57
58 /* matroska top-level master IDs */
59 #define MATROSKA_ID_INFO       0x1549A966
60 #define MATROSKA_ID_TRACKS     0x1654AE6B
61 #define MATROSKA_ID_CUES       0x1C53BB6B
62 #define MATROSKA_ID_TAGS       0x1254C367
63 #define MATROSKA_ID_SEEKHEAD   0x114D9B74
64 #define MATROSKA_ID_CLUSTER    0x1F43B675
65
66 /* IDs in the info master */
67 #define MATROSKA_ID_TIMECODESCALE 0x2AD7B1
68 #define MATROSKA_ID_DURATION   0x4489
69 #define MATROSKA_ID_WRITINGAPP 0x5741
70 #define MATROSKA_ID_MUXINGAPP  0x4D80
71 #define MATROSKA_ID_DATEUTC    0x4461
72
73 /* ID in the tracks master */
74 #define MATROSKA_ID_TRACKENTRY 0xAE
75
76 /* IDs in the trackentry master */
77 #define MATROSKA_ID_TRACKNUMBER 0xD7
78 #define MATROSKA_ID_TRACKUID   0x73C5
79 #define MATROSKA_ID_TRACKTYPE  0x83
80 #define MATROSKA_ID_TRACKAUDIO 0xE1
81 #define MATROSKA_ID_TRACKVIDEO 0xE0
82 #define MATROSKA_ID_CODECID    0x86
83 #define MATROSKA_ID_CODECPRIVATE 0x63A2
84 #define MATROSKA_ID_CODECNAME  0x258688
85 #define MATROSKA_ID_CODECINFOURL 0x3B4040
86 #define MATROSKA_ID_CODECDOWNLOADURL 0x26B240
87 #define MATROSKA_ID_TRACKNAME  0x536E
88 #define MATROSKA_ID_TRACKLANGUAGE 0x22B59C
89 #define MATROSKA_ID_TRACKFLAGENABLED 0xB9
90 #define MATROSKA_ID_TRACKFLAGDEFAULT 0x88
91 #define MATROSKA_ID_TRACKFLAGLACING 0x9C
92 #define MATROSKA_ID_TRACKMINCACHE 0x6DE7
93 #define MATROSKA_ID_TRACKMAXCACHE 0x6DF8
94 #define MATROSKA_ID_TRACKDEFAULTDURATION 0x23E383
95
96 /* IDs in the trackvideo master */
97 #define MATROSKA_ID_VIDEOFRAMERATE 0x2383E3
98 #define MATROSKA_ID_VIDEODISPLAYWIDTH 0x54B0
99 #define MATROSKA_ID_VIDEODISPLAYHEIGHT 0x54BA
100 #define MATROSKA_ID_VIDEOPIXELWIDTH 0xB0
101 #define MATROSKA_ID_VIDEOPIXELHEIGHT 0xBA
102 #define MATROSKA_ID_VIDEOFLAGINTERLACED 0x9A
103 #define MATROSKA_ID_VIDEOSTEREOMODE 0x53B9
104 #define MATROSKA_ID_VIDEOASPECTRATIO 0x54B3
105 #define MATROSKA_ID_VIDEOCOLOURSPACE 0x2EB524
106
107 /* IDs in the trackaudio master */
108 #define MATROSKA_ID_AUDIOSAMPLINGFREQ 0xB5
109 #define MATROSKA_ID_AUDIOBITDEPTH 0x6264
110 #define MATROSKA_ID_AUDIOCHANNELS 0x9F
111
112 /* ID in the cues master */
113 #define MATROSKA_ID_POINTENTRY 0xBB
114
115 /* IDs in the pointentry master */
116 #define MATROSKA_ID_CUETIME    0xB3
117 #define MATROSKA_ID_CUETRACKPOSITION 0xB7
118
119 /* IDs in the cuetrackposition master */
120 #define MATROSKA_ID_CUETRACK   0xF7
121 #define MATROSKA_ID_CUECLUSTERPOSITION 0xF1
122
123 /* IDs in the tags master */
124 /* TODO */
125
126 /* IDs in the seekhead master */
127 #define MATROSKA_ID_SEEKENTRY  0x4DBB
128
129 /* IDs in the seekpoint master */
130 #define MATROSKA_ID_SEEKID     0x53AB
131 #define MATROSKA_ID_SEEKPOSITION 0x53AC
132
133 /* IDs in the cluster master */
134 #define MATROSKA_ID_CLUSTERTIMECODE 0xE7
135 #define MATROSKA_ID_BLOCKGROUP 0xA0
136
137 /* IDs in the blockgroup master */
138 #define MATROSKA_ID_BLOCK      0xA1
139 #define MATROSKA_ID_BLOCKDURATION 0x9B
140 #define MATROSKA_ID_BLOCKREFERENCE 0xFB
141
142 typedef enum {
143   MATROSKA_TRACK_TYPE_VIDEO    = 0x1,
144   MATROSKA_TRACK_TYPE_AUDIO    = 0x2,
145   MATROSKA_TRACK_TYPE_COMPLEX  = 0x3,
146   MATROSKA_TRACK_TYPE_LOGO     = 0x10,
147   MATROSKA_TRACK_TYPE_SUBTITLE = 0x11,
148   MATROSKA_TRACK_TYPE_CONTROL  = 0x20,
149 } MatroskaTrackType;
150
151 typedef enum {
152   MATROSKA_EYE_MODE_MONO  = 0x0,
153   MATROSKA_EYE_MODE_RIGHT = 0x1,
154   MATROSKA_EYE_MODE_LEFT  = 0x2,
155   MATROSKA_EYE_MODE_BOTH  = 0x3,
156 } MatroskaEyeMode;
157
158 typedef enum {
159   MATROSKA_ASPECT_RATIO_MODE_FREE  = 0x0,
160   MATROSKA_ASPECT_RATIO_MODE_KEEP  = 0x1,
161   MATROSKA_ASPECT_RATIO_MODE_FIXED = 0x2,
162 } MatroskaAspectRatioMode;
163
164 /*
165  * These aren't in any way "matroska-form" things,
166  * it's just something I use in the muxer/demuxer.
167  */
168
169 typedef enum {
170   MATROSKA_TRACK_ENABLED = (1<<0),
171   MATROSKA_TRACK_DEFAULT = (1<<1),
172   MATROSKA_TRACK_LACING  = (1<<2),
173   MATROSKA_TRACK_SHIFT   = (1<<16)
174 } MatroskaTrackFlags;
175
176 typedef enum {
177   MATROSKA_VIDEOTRACK_INTERLACED = (MATROSKA_TRACK_SHIFT<<0)
178 } MatroskaVideoTrackFlags;
179
180 /*
181  * Matroska Codec IDs. Strings.
182  */
183
184 #define MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC   "V_MS/VFW/FOURCC"
185 #define MATROSKA_CODEC_ID_VIDEO_UNCOMPRESSED "V_UNCOMPRESSED"
186 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_SP     "V_MPEG4/ISO/SP"
187 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_ASP    "V_MPEG4/ISO/ASP"
188 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_AP     "V_MPEG4/ISO/AP"
189 #define MATROSKA_CODEC_ID_VIDEO_MSMPEG4V3    "V_MPEG4/MS/V3"
190 #define MATROSKA_CODEC_ID_VIDEO_MPEG1        "V_MPEG1"
191 #define MATROSKA_CODEC_ID_VIDEO_MPEG2        "V_MPEG2"
192 #define MATROSKA_CODEC_ID_VIDEO_MJPEG        "V_MJPEG"
193 /* TODO: Real/Quicktime */
194
195 #define MATROSKA_CODEC_ID_AUDIO_ACM          "A_MS/ACM"
196 #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L1     "A_MPEG/L1"
197 #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L2     "A_MPEG/L2"
198 #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L3     "A_MPEG/L3"
199 #define MATROSKA_CODEC_ID_AUDIO_PCM_INT_BE   "A_PCM/INT/BIG"
200 #define MATROSKA_CODEC_ID_AUDIO_PCM_INT_LE   "A_PCM/INT/LIT"
201 #define MATROSKA_CODEC_ID_AUDIO_PCM_FLOAT    "A_PCM/FLOAT/IEEE"
202 #define MATROSKA_CODEC_ID_AUDIO_AC3          "A_AC3"
203 #define MATROSKA_CODEC_ID_AUDIO_DTS          "A_DTS"
204 #define MATROSKA_CODEC_ID_AUDIO_VORBIS       "A_VORBIS"
205 #define MATROSKA_CODEC_ID_AUDIO_ACM          "A_MS/ACM"
206 #define MATROSKA_CODEC_ID_AUDIO_MPEG2        "A_AAC/MPEG2/"
207 #define MATROSKA_CODEC_ID_AUDIO_MPEG4        "A_AAC/MPEG4/"
208 /* TODO: AC3-9/10 (?), Real, Musepack, Quicktime */
209
210 /* max. depth in the EBML tree structure */
211 #define EBML_MAX_DEPTH 16
212
213 typedef struct Track {
214     MatroskaTrackType type;
215
216     /* Unique track number and track ID. stream_index is the index that
217      * the calling app uses for this track. */
218     uint32_t num,
219         uid,
220         stream_index;
221
222     char *name,
223         *language;
224
225     char *codec_id,
226         *codec_name;
227
228     unsigned char *codec_priv;
229     int codec_priv_size;
230
231     int64_t default_duration;
232     MatroskaTrackFlags flags;
233 } MatroskaTrack;
234
235 typedef struct MatroskaVideoTrack {
236     MatroskaTrack track;
237
238     int pixel_width,
239         pixel_height,
240         display_width,
241         display_height;
242
243     uint32_t fourcc;
244
245     MatroskaAspectRatioMode ar_mode;
246     MatroskaEyeMode eye_mode;
247
248     //..
249 } MatroskaVideoTrack;
250
251 typedef struct MatroskaAudioTrack {
252     MatroskaTrack track;
253
254     int channels,
255         bitdepth,
256         samplerate;
257     //..
258 } MatroskaAudioTrack;
259
260 typedef struct MatroskaSubtitleTrack {
261     MatroskaTrack track;
262
263     //..
264 } MatroskaSubtitleTrack;
265
266 typedef struct MatroskaLevel {
267     uint64_t start, length;
268 } MatroskaLevel;
269
270 typedef struct MatroskaDemuxIndex {
271   uint64_t        pos;   /* of the corresponding *cluster*! */
272   uint16_t        track; /* reference to 'num' */
273   uint64_t        time;  /* in nanoseconds */
274 } MatroskaDemuxIndex;
275
276 typedef struct MatroskaDemuxContext {
277     AVFormatContext *ctx;
278
279     /* ebml stuff */
280     int num_levels;
281     MatroskaLevel levels[EBML_MAX_DEPTH];
282     int level_up;
283
284     /* matroska stuff */
285     char *writing_app,
286         *muxing_app;
287     int64_t created;
288
289     /* timescale in the file */
290     int64_t time_scale;
291
292     /* length, position (time, ns) */
293     int64_t duration,
294         pos;
295
296     /* num_streams is the number of streams that av_new_stream() was called
297      * for ( = that are available to the calling program). */
298     int num_tracks, num_streams;
299     MatroskaTrack *tracks[MAX_STREAMS];
300
301     /* cache for ID peeking */
302     uint32_t peek_id;
303
304     /* byte position of the segment inside the stream */
305     offset_t segment_start;
306
307     /* The packet queue. */
308     AVPacket **packets;
309     int num_packets;
310
311     /* have we already parse metadata/cues/clusters? */
312     int metadata_parsed,
313         index_parsed,
314         done;
315
316     /* The index for seeking. */
317     int num_indexes;
318     MatroskaDemuxIndex *index;
319 } MatroskaDemuxContext;
320
321 /*
322  * The first few functions handle EBML file parsing. The rest
323  * is the document interpretation. Matroska really just is a
324  * EBML file.
325  */
326
327 /*
328  * Return: the amount of levels in the hierarchy that the
329  * current element lies higher than the previous one.
330  * The opposite isn't done - that's auto-done using master
331  * element reading.
332  */
333
334 static int
335 ebml_read_element_level_up (MatroskaDemuxContext *matroska)
336 {
337     ByteIOContext *pb = &matroska->ctx->pb;
338     offset_t pos = url_ftell(pb);
339     int num = 0;
340
341     while (matroska->num_levels > 0) {
342         MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
343
344         if (pos >= level->start + level->length) {
345             matroska->num_levels--;
346             num++;
347         } else {
348             break;
349         }
350     }
351
352     return num;
353 }
354
355 /*
356  * Read: an "EBML number", which is defined as a variable-length
357  * array of bytes. The first byte indicates the length by giving a
358  * number of 0-bits followed by a one. The position of the first
359  * "one" bit inside the first byte indicates the length of this
360  * number.
361  * Returns: num. of bytes read. < 0 on error.
362  */
363
364 static int
365 ebml_read_num (MatroskaDemuxContext *matroska,
366                int                   max_size,
367                uint64_t             *number)
368 {
369     ByteIOContext *pb = &matroska->ctx->pb;
370     int len_mask = 0x80, read = 1, n = 1;
371     int64_t total = 0;
372
373     /* the first byte tells us the length in bytes - get_byte() can normally
374      * return 0, but since that's not a valid first ebmlID byte, we can
375      * use it safely here to catch EOS. */
376     if (!(total = get_byte(pb))) {
377         /* we might encounter EOS here */
378         if (!url_feof(pb)) {
379             offset_t pos = url_ftell(pb);
380             av_log(matroska->ctx, AV_LOG_ERROR,
381                    "Read error at pos. %llu (0x%llx)\n",
382                    pos, pos);
383         }
384         return AVERROR_IO; /* EOS or actual I/O error */
385     }
386
387     /* get the length of the EBML number */
388     while (read <= max_size && !(total & len_mask)) {
389         read++;
390         len_mask >>= 1;
391     }
392     if (read > max_size) {
393         offset_t pos = url_ftell(pb) - 1;
394         av_log(matroska->ctx, AV_LOG_ERROR,
395                "Invalid EBML number size tag 0x%02x at pos %llu (0x%llx)\n",
396                (uint8_t) total, pos, pos);
397         return AVERROR_INVALIDDATA;
398     }
399
400     /* read out length */
401     total &= ~len_mask;
402     while (n++ < read)
403         total = (total << 8) | get_byte(pb);
404
405     *number = total;
406
407     return read;
408 }
409
410 /*
411  * Read: the element content data ID.
412  * Return: the number of bytes read or < 0 on error.
413  */
414
415 static int
416 ebml_read_element_id (MatroskaDemuxContext *matroska,
417                       uint32_t             *id,
418                       int                  *level_up)
419 {
420     int read;
421     uint64_t total;
422
423     /* if we re-call this, use our cached ID */
424     if (matroska->peek_id != 0) {
425         if (level_up)
426             *level_up = 0;
427         *id = matroska->peek_id;
428         return 0;
429     }
430
431     /* read out the "EBML number", include tag in ID */
432     if ((read = ebml_read_num(matroska, 4, &total)) < 0)
433         return read;
434     *id = matroska->peek_id  = total | (1 << (read * 7));
435
436     /* level tracking */
437     if (level_up)
438         *level_up = ebml_read_element_level_up(matroska);
439
440     return read;
441 }
442
443 /*
444  * Read: element content length.
445  * Return: the number of bytes read or < 0 on error.
446  */
447
448 static int
449 ebml_read_element_length (MatroskaDemuxContext *matroska,
450                           uint64_t             *length)
451 {
452     /* clear cache since we're now beyond that data point */
453     matroska->peek_id = 0;
454
455     /* read out the "EBML number", include tag in ID */
456     return ebml_read_num(matroska, 8, length);
457 }
458
459 /*
460  * Return: the ID of the next element, or 0 on error.
461  * Level_up contains the amount of levels that this
462  * next element lies higher than the previous one.
463  */
464
465 static uint32_t
466 ebml_peek_id (MatroskaDemuxContext *matroska,
467               int                  *level_up)
468 {
469     uint32_t id;
470
471     assert(level_up != NULL);
472
473     if (ebml_read_element_id(matroska, &id, level_up) < 0)
474         return 0;
475
476     return id;
477 }
478
479 /*
480  * Seek to a given offset.
481  * 0 is success, -1 is failure.
482  */
483
484 static int
485 ebml_read_seek (MatroskaDemuxContext *matroska,
486                 offset_t              offset)
487 {
488     ByteIOContext *pb = &matroska->ctx->pb;
489
490     /* clear ID cache, if any */
491     matroska->peek_id = 0;
492
493     return (url_fseek(pb, offset, SEEK_SET) == offset) ? 0 : -1;
494 }
495
496 /*
497  * Skip the next element.
498  * 0 is success, -1 is failure.
499  */
500
501 static int
502 ebml_read_skip (MatroskaDemuxContext *matroska)
503 {
504     ByteIOContext *pb = &matroska->ctx->pb;
505     uint32_t id;
506     uint64_t length;
507     int res;
508
509     if ((res = ebml_read_element_id(matroska, &id, NULL)) < 0 ||
510         (res = ebml_read_element_length(matroska, &length)) < 0)
511         return res;
512
513     url_fskip(pb, length);
514
515     return 0;
516 }
517
518 /*
519  * Read the next element as an unsigned int.
520  * 0 is success, < 0 is failure.
521  */
522
523 static int
524 ebml_read_uint (MatroskaDemuxContext *matroska,
525                 uint32_t             *id,
526                 uint64_t             *num)
527 {
528     ByteIOContext *pb = &matroska->ctx->pb;
529     int n = 0, size, res;
530     uint64_t rlength;
531
532     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
533         (res = ebml_read_element_length(matroska, &rlength)) < 0)
534         return res;
535     size = rlength;
536     if (size < 1 || size > 8) {
537         offset_t pos = url_ftell(pb);
538         av_log(matroska->ctx, AV_LOG_ERROR,
539                "Invalid uint element size %d at position %lld (0x%llx)\n",
540                 size, pos, pos);
541         return AVERROR_INVALIDDATA;
542     }
543
544     /* big-endian ordening; build up number */
545     *num = 0;
546     while (n++ < size)
547         *num = (*num << 8) | get_byte(pb);
548
549     return 0;
550 }
551
552 /*
553  * Read the next element as a signed int.
554  * 0 is success, < 0 is failure.
555  */
556
557 static int
558 ebml_read_sint (MatroskaDemuxContext *matroska,
559                 uint32_t             *id,
560                 int64_t              *num)
561 {
562     ByteIOContext *pb = &matroska->ctx->pb;
563     int size, n = 1, negative = 0, res;
564     uint64_t rlength;
565
566     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
567         (res = ebml_read_element_length(matroska, &rlength)) < 0)
568         return res;
569     size = rlength;
570     if (size < 1 || size > 8) {
571         offset_t pos = url_ftell(pb);
572         av_log(matroska->ctx, AV_LOG_ERROR,
573                "Invalid sint element size %d at position %lld (0x%llx)\n",
574                 size, pos, pos);
575         return AVERROR_INVALIDDATA;
576     }
577     if ((*num = get_byte(pb)) & 0x80) {
578         negative = 1;
579         *num &= ~0x80;
580     }
581     *num = 0;
582     while (n++ < size)
583         *num = (*num << 8) | get_byte(pb);
584
585     /* make signed */
586     if (negative)
587         *num = *num - (1LL << ((8 * size) - 1));
588
589     return 0;
590 }
591
592 /*
593  * Read the next element as a float.
594  * 0 is success, < 0 is failure.
595  */
596
597 static int
598 ebml_read_float (MatroskaDemuxContext *matroska,
599                  uint32_t             *id,
600                  double               *num)
601 {
602     ByteIOContext *pb = &matroska->ctx->pb;
603     int size, res;
604     uint64_t rlength;
605
606     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
607         (res = ebml_read_element_length(matroska, &rlength)) < 0)
608         return res;
609     size = rlength;
610
611     if (size != 4 && size != 8 && size != 10) {
612         offset_t pos = url_ftell(pb);
613         av_log(matroska->ctx, AV_LOG_ERROR,
614                "Invalid float element size %d at position %llu (0x%llx)\n",
615                size, pos, pos);
616         return AVERROR_INVALIDDATA;
617     }
618     if (size == 10) {
619         av_log(matroska->ctx, AV_LOG_ERROR,
620                "FIXME! 10-byte floats unimplemented\n");
621         return AVERROR_UNKNOWN;
622     }
623
624     if (size == 4) {
625         float f;
626
627         while (size-- > 0)
628 #ifdef WORDS_BIGENDIAN
629             ((uint8_t *) &f)[3 - size] = get_byte(pb);
630 #else
631             ((uint8_t *) &f)[size] = get_byte(pb);
632 #endif
633
634         *num = f;
635     } else {
636         double d;
637
638         while (size-- > 0)
639 #ifdef WORDS_BIGENDIAN
640             ((uint8_t *) &d)[7 - size] = get_byte(pb);
641 #else
642             ((uint8_t *) &d)[size] = get_byte(pb);
643 #endif
644
645         *num = d;
646     }
647
648     return 0;
649 }
650
651 /*
652  * Read the next element as an ASCII string.
653  * 0 is success, < 0 is failure.
654  */
655
656 static int
657 ebml_read_ascii (MatroskaDemuxContext *matroska,
658                  uint32_t             *id,
659                  char                **str)
660 {
661     ByteIOContext *pb = &matroska->ctx->pb;
662     int size, res;
663     uint64_t rlength;
664
665     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
666         (res = ebml_read_element_length(matroska, &rlength)) < 0)
667         return res;
668     size = rlength;
669
670     /* ebml strings are usually not 0-terminated, so we allocate one
671      * byte more, read the string and NULL-terminate it ourselves. */
672     if (!(*str = av_malloc(size + 1))) {
673         av_log(matroska->ctx, AV_LOG_ERROR, "Memory allocation failed\n");
674         return AVERROR_NOMEM;
675     }
676     if (get_buffer(pb, (uint8_t *) *str, size) != size) {
677         offset_t pos = url_ftell(pb);
678         av_log(matroska->ctx, AV_LOG_ERROR,
679                "Read error at pos. %llu (0x%llx)\n", pos, pos);
680         return AVERROR_IO;
681     }
682     (*str)[size] = '\0';
683
684     return 0;
685 }
686
687 /*
688  * Read the next element as a UTF-8 string.
689  * 0 is success, < 0 is failure.
690  */
691
692 static int
693 ebml_read_utf8 (MatroskaDemuxContext *matroska,
694                 uint32_t             *id,
695                 char                **str)
696 {
697   return ebml_read_ascii(matroska, id, str);
698 }
699
700 /*
701  * Read the next element as a date (nanoseconds since 1/1/2000).
702  * 0 is success, < 0 is failure.
703  */
704
705 static int
706 ebml_read_date (MatroskaDemuxContext *matroska,
707                 uint32_t             *id,
708                 int64_t              *date)
709 {
710   return ebml_read_sint(matroska, id, date);
711 }
712
713 /*
714  * Read the next element, but only the header. The contents
715  * are supposed to be sub-elements which can be read separately.
716  * 0 is success, < 0 is failure.
717  */
718
719 static int
720 ebml_read_master (MatroskaDemuxContext *matroska,
721                   uint32_t             *id)
722 {
723     ByteIOContext *pb = &matroska->ctx->pb;
724     uint64_t length;
725     MatroskaLevel *level;
726     int res;
727
728     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
729         (res = ebml_read_element_length(matroska, &length)) < 0)
730         return res;
731
732     /* protect... (Heaven forbids that the '>' is true) */
733     if (matroska->num_levels >= EBML_MAX_DEPTH) {
734         av_log(matroska->ctx, AV_LOG_ERROR,
735                "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
736         return AVERROR_NOTSUPP;
737     }
738
739     /* remember level */
740     level = &matroska->levels[matroska->num_levels++];
741     level->start = url_ftell(pb);
742     level->length = length;
743
744     return 0;
745 }
746
747 /*
748  * Read the next element as binary data.
749  * 0 is success, < 0 is failure.
750  */
751
752 static int
753 ebml_read_binary (MatroskaDemuxContext *matroska,
754                   uint32_t             *id,
755                   uint8_t             **binary,
756                   int                  *size)
757 {
758     ByteIOContext *pb = &matroska->ctx->pb;
759     uint64_t rlength;
760     int res;
761
762     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
763         (res = ebml_read_element_length(matroska, &rlength)) < 0)
764         return res;
765     *size = rlength;
766
767     if (!(*binary = av_malloc(*size))) {
768         av_log(matroska->ctx, AV_LOG_ERROR,
769                "Memory allocation error\n");
770         return AVERROR_NOMEM;
771     }
772
773     if (get_buffer(pb, *binary, *size) != *size) {
774         offset_t pos = url_ftell(pb);
775         av_log(matroska->ctx, AV_LOG_ERROR,
776                "Read error at pos. %llu (0x%llx)\n", pos, pos);
777         return AVERROR_IO;
778     }
779
780     return 0;
781 }
782
783 /*
784  * Read signed/unsigned "EBML" numbers.
785  * Return: number of bytes processed, < 0 on error.
786  * XXX: use ebml_read_num().
787  */
788
789 static int
790 matroska_ebmlnum_uint (uint8_t  *data,
791                        uint32_t  size,
792                        uint64_t *num)
793 {
794     int len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
795     uint64_t total;
796
797     if (size <= 0)
798         return AVERROR_INVALIDDATA;
799
800     total = data[0];
801     while (read <= 8 && !(total & len_mask)) {
802         read++;
803         len_mask >>= 1;
804     }
805     if (read > 8)
806         return AVERROR_INVALIDDATA;
807
808     if ((total &= (len_mask - 1)) == len_mask - 1)
809         num_ffs++;
810     if (size < read)
811         return AVERROR_INVALIDDATA;
812     while (n < read) {
813         if (data[n] == 0xff)
814             num_ffs++;
815         total = (total << 8) | data[n];
816         n++;
817     }
818
819     if (!total)
820         return AVERROR_INVALIDDATA;
821
822     if (read == num_ffs)
823         *num = (uint64_t)-1;
824     else
825         *num = total;
826
827     return read;
828 }
829
830 /*
831  * Same as above, but signed.
832  */
833
834 static int
835 matroska_ebmlnum_sint (uint8_t  *data,
836                        uint32_t  size,
837                        int64_t  *num)
838 {
839     uint64_t unum;
840     int res;
841
842     /* read as unsigned number first */
843     if ((res = matroska_ebmlnum_uint(data, size, &unum)) < 0)
844         return res;
845
846     /* make signed (weird way) */
847     if (unum == (uint64_t)-1)
848         *num = INT64_MAX;
849     else
850         *num = unum - ((1LL << ((7 * res) - 1)) - 1);
851
852     return res;
853 }
854
855 /*
856  * Read an EBML header.
857  * 0 is success, < 0 is failure.
858  */
859
860 static int
861 ebml_read_header (MatroskaDemuxContext *matroska,
862                   char                **doctype,
863                   int                  *version)
864 {
865     uint32_t id;
866     int level_up, res = 0;
867
868     /* default init */
869     if (doctype)
870         *doctype = NULL;
871     if (version)
872         *version = 1;
873
874     if (!(id = ebml_peek_id(matroska, &level_up)) ||
875         level_up != 0 || id != EBML_ID_HEADER) {
876         av_log(matroska->ctx, AV_LOG_ERROR,
877                "This is not an EBML file (id=0x%x/0x%x)\n", id, EBML_ID_HEADER);
878         return AVERROR_INVALIDDATA;
879     }
880     if ((res = ebml_read_master(matroska, &id)) < 0)
881         return res;
882
883     while (res == 0) {
884         if (!(id = ebml_peek_id(matroska, &level_up)))
885             return AVERROR_IO;
886
887         /* end-of-header */
888         if (level_up)
889             break;
890
891         switch (id) {
892             /* is our read version uptodate? */
893             case EBML_ID_EBMLREADVERSION: {
894                 uint64_t num;
895
896                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
897                     return res;
898                 if (num > EBML_VERSION) {
899                     av_log(matroska->ctx, AV_LOG_ERROR,
900                            "EBML version %llu (> %d) is not supported\n",
901                            num, EBML_VERSION);
902                     return AVERROR_INVALIDDATA;
903                 }
904                 break;
905             }
906
907             /* we only handle 8 byte lengths at max */
908             case EBML_ID_EBMLMAXSIZELENGTH: {
909                 uint64_t num;
910
911                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
912                     return res;
913                 if (num > sizeof(uint64_t)) {
914                     av_log(matroska->ctx, AV_LOG_ERROR,
915                            "Integers of size %llu (> %d) not supported\n",
916                            num, sizeof(uint64_t));
917                     return AVERROR_INVALIDDATA;
918                 }
919                 break;
920             }
921
922             /* we handle 4 byte IDs at max */
923             case EBML_ID_EBMLMAXIDLENGTH: {
924                 uint64_t num;
925
926                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
927                     return res;
928                 if (num > sizeof(uint32_t)) {
929                     av_log(matroska->ctx, AV_LOG_ERROR,
930                            "IDs of size %llu (> %u) not supported\n",
931                             num, sizeof(uint32_t));
932                     return AVERROR_INVALIDDATA;
933                 }
934                 break;
935             }
936
937             case EBML_ID_DOCTYPE: {
938                 char *text;
939
940                 if ((res = ebml_read_ascii(matroska, &id, &text)) < 0)
941                     return res;
942                 if (doctype) {
943                     if (*doctype)
944                         av_free(*doctype);
945                     *doctype = text;
946                 } else
947                     av_free(text);
948                 break;
949             }
950
951             case EBML_ID_DOCTYPEREADVERSION: {
952                 uint64_t num;
953
954                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
955                     return res;
956                 if (version)
957                     *version = num;
958                 break;
959             }
960
961             default:
962                 av_log(matroska->ctx, AV_LOG_INFO,
963                        "Unknown data type 0x%x in EBML header", id);
964                 /* pass-through */
965
966             case EBML_ID_VOID:
967             /* we ignore these two, as they don't tell us anything we
968              * care about */
969             case EBML_ID_EBMLVERSION:
970             case EBML_ID_DOCTYPEVERSION:
971                 res = ebml_read_skip (matroska);
972                 break;
973         }
974     }
975
976     return 0;
977 }
978
979 /*
980  * Put one packet in an application-supplied AVPacket struct.
981  * Returns 0 on success or -1 on failure.
982  */
983
984 static int
985 matroska_deliver_packet (MatroskaDemuxContext *matroska,
986                          AVPacket             *pkt)
987 {
988     if (matroska->num_packets > 0) {
989         memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
990         av_free(matroska->packets[0]);
991         if (matroska->num_packets > 1) {
992             memmove(&matroska->packets[0], &matroska->packets[1],
993                     (matroska->num_packets - 1) * sizeof(AVPacket *));
994             matroska->packets =
995                 av_realloc(matroska->packets, (matroska->num_packets - 1) *
996                            sizeof(AVPacket *));
997         } else {
998             av_free(matroska->packets);
999             matroska->packets = NULL;
1000         }
1001         matroska->num_packets--;
1002         return 0;
1003     }
1004
1005     return -1;
1006 }
1007
1008 /*
1009  * Put a packet into our internal queue. Will be delivered to the
1010  * user/application during the next get_packet() call.
1011  */
1012
1013 static void
1014 matroska_queue_packet (MatroskaDemuxContext *matroska,
1015                        AVPacket             *pkt)
1016 {
1017     matroska->packets =
1018         av_realloc(matroska->packets, (matroska->num_packets + 1) *
1019                    sizeof(AVPacket *));
1020     matroska->packets[matroska->num_packets] = pkt;
1021     matroska->num_packets++;
1022 }
1023
1024 /*
1025  * Autodetecting...
1026  */
1027
1028 static int
1029 matroska_probe (AVProbeData *p)
1030 {
1031     uint64_t total = 0;
1032     int len_mask = 0x80, size = 1, n = 1;
1033     uint8_t probe_data[] = { 'm', 'a', 't', 'r', 'o', 's', 'k', 'a' };
1034
1035     if (p->buf_size < 5)
1036         return 0;
1037
1038     /* ebml header? */
1039     if ((p->buf[0] << 24 | p->buf[1] << 16 |
1040          p->buf[2] << 8 | p->buf[3]) != EBML_ID_HEADER)
1041         return 0;
1042
1043     /* length of header */
1044     total = p->buf[4];
1045     while (size <= 8 && !(total & len_mask)) {
1046         size++;
1047         len_mask >>= 1;
1048     }
1049     if (size > 8)
1050       return 0;
1051     total &= (len_mask - 1);
1052     while (n < size)
1053         total = (total << 8) | p->buf[4 + n++];
1054
1055     /* does the probe data contain the whole header? */
1056     if (p->buf_size < 4 + size + total)
1057       return 0;
1058
1059     /* the header must contain the document type 'matroska'. For now,
1060      * we don't parse the whole header but simply check for the
1061      * availability of that array of characters inside the header.
1062      * Not fully fool-proof, but good enough. */
1063     for (n = 4 + size; n < 4 + size + total - sizeof(probe_data); n++)
1064         if (!memcmp (&p->buf[n], probe_data, sizeof(probe_data)))
1065             return AVPROBE_SCORE_MAX;
1066
1067     return 0;
1068 }
1069
1070 /*
1071  * From here on, it's all XML-style DTD stuff... Needs no comments.
1072  */
1073
1074 static int
1075 matroska_parse_info (MatroskaDemuxContext *matroska)
1076 {
1077     int res = 0;
1078     uint32_t id;
1079
1080     av_log(matroska->ctx, AV_LOG_DEBUG, "Parsing info...\n");
1081
1082     while (res == 0) {
1083         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1084             res = AVERROR_IO;
1085             break;
1086         } else if (matroska->level_up) {
1087             matroska->level_up--;
1088             break;
1089         }
1090
1091         switch (id) {
1092             /* cluster timecode */
1093             case MATROSKA_ID_TIMECODESCALE: {
1094                 uint64_t num;
1095                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1096                     break;
1097                 matroska->time_scale = num;
1098                 break;
1099             }
1100
1101             case MATROSKA_ID_DURATION: {
1102                 double num;
1103                 if ((res = ebml_read_float(matroska, &id, &num)) < 0)
1104                     break;
1105                 matroska->duration = num * matroska->time_scale;
1106                 break;
1107             }
1108
1109             case MATROSKA_ID_WRITINGAPP: {
1110                 char *text;
1111                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1112                     break;
1113                 matroska->writing_app = text;
1114                 break;
1115             }
1116
1117             case MATROSKA_ID_MUXINGAPP: {
1118                 char *text;
1119                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1120                     break;
1121                 matroska->muxing_app = text;
1122                 break;
1123             }
1124
1125             case MATROSKA_ID_DATEUTC: {
1126                 int64_t time;
1127                 if ((res = ebml_read_date(matroska, &id, &time)) < 0)
1128                     break;
1129                 matroska->created = time;
1130                 break;
1131             }
1132
1133             default:
1134                 av_log(matroska->ctx, AV_LOG_INFO,
1135                        "Unknown entry 0x%x in info header\n", id);
1136                 /* fall-through */
1137
1138             case EBML_ID_VOID:
1139                 res = ebml_read_skip(matroska);
1140                 break;
1141         }
1142
1143         if (matroska->level_up) {
1144             matroska->level_up--;
1145             break;
1146         }
1147     }
1148
1149     return res;
1150 }
1151
1152 static int
1153 matroska_add_stream (MatroskaDemuxContext *matroska)
1154 {
1155     int res = 0;
1156     uint32_t id;
1157     MatroskaTrack *track;
1158
1159     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing track, adding stream..,\n");
1160
1161     /* Allocate a generic track. As soon as we know its type we'll realloc. */
1162     track = av_mallocz(sizeof(MatroskaTrack));
1163     matroska->num_tracks++;
1164
1165     /* start with the master */
1166     if ((res = ebml_read_master(matroska, &id)) < 0)
1167         return res;
1168
1169     /* try reading the trackentry headers */
1170     while (res == 0) {
1171         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1172             res = AVERROR_IO;
1173             break;
1174         } else if (matroska->level_up > 0) {
1175             matroska->level_up--;
1176             break;
1177         }
1178
1179         switch (id) {
1180             /* track number (unique stream ID) */
1181             case MATROSKA_ID_TRACKNUMBER: {
1182                 uint64_t num;
1183                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1184                     break;
1185                 track->num = num;
1186                 break;
1187             }
1188
1189             /* track UID (unique identifier) */
1190             case MATROSKA_ID_TRACKUID: {
1191                 uint64_t num;
1192                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1193                     break;
1194                 track->uid = num;
1195                 break;
1196             }
1197
1198             /* track type (video, audio, combined, subtitle, etc.) */
1199             case MATROSKA_ID_TRACKTYPE: {
1200                 uint64_t num;
1201                 if (track->type != 0) {
1202                     av_log(matroska->ctx, AV_LOG_INFO,
1203                            "More than one tracktype in an entry - skip\n");
1204                     break;
1205                 }
1206                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1207                     break;
1208                 track->type = num;
1209
1210                 /* ok, so we're actually going to reallocate this thing */
1211                 switch (track->type) {
1212                     case MATROSKA_TRACK_TYPE_VIDEO:
1213                         track = (MatroskaTrack *)
1214                             av_realloc(track, sizeof(MatroskaVideoTrack));
1215                         break;
1216                     case MATROSKA_TRACK_TYPE_AUDIO:
1217                         track = (MatroskaTrack *)
1218                             av_realloc(track, sizeof(MatroskaAudioTrack));
1219                         ((MatroskaAudioTrack *)track)->channels = 1;
1220                         ((MatroskaAudioTrack *)track)->samplerate = 8000;
1221                         break;
1222                     case MATROSKA_TRACK_TYPE_SUBTITLE:
1223                         track = (MatroskaTrack *)
1224                             av_realloc(track, sizeof(MatroskaSubtitleTrack));
1225                         break;
1226                     case MATROSKA_TRACK_TYPE_COMPLEX:
1227                     case MATROSKA_TRACK_TYPE_LOGO:
1228                     case MATROSKA_TRACK_TYPE_CONTROL:
1229                     default:
1230                         av_log(matroska->ctx, AV_LOG_INFO,
1231                                "Unknown or unsupported track type 0x%x\n",
1232                                track->type);
1233                         track->type = 0;
1234                         break;
1235                 }
1236                 matroska->tracks[matroska->num_tracks - 1] = track;
1237                 break;
1238             }
1239
1240             /* tracktype specific stuff for video */
1241             case MATROSKA_ID_TRACKVIDEO: {
1242                 MatroskaVideoTrack *videotrack;
1243                 if (track->type != MATROSKA_TRACK_TYPE_VIDEO) {
1244                     av_log(matroska->ctx, AV_LOG_INFO,
1245                            "video data in non-video track - ignoring\n");
1246                     res = AVERROR_INVALIDDATA;
1247                     break;
1248                 } else if ((res = ebml_read_master(matroska, &id)) < 0)
1249                     break;
1250                 videotrack = (MatroskaVideoTrack *)track;
1251
1252                 while (res == 0) {
1253                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1254                         res = AVERROR_IO;
1255                         break;
1256                     } else if (matroska->level_up > 0) {
1257                         matroska->level_up--;
1258                         break;
1259                     }
1260
1261                     switch (id) {
1262                         /* fixme, this should be one-up, but I get it here */
1263                         case MATROSKA_ID_TRACKDEFAULTDURATION: {
1264                             uint64_t num;
1265                             if ((res = ebml_read_uint (matroska, &id,
1266                                                        &num)) < 0)
1267                                 break;
1268                             track->default_duration = num;
1269                             break;
1270                         }
1271
1272                         /* video framerate */
1273                         case MATROSKA_ID_VIDEOFRAMERATE: {
1274                             double num;
1275                             if ((res = ebml_read_float(matroska, &id,
1276                                                        &num)) < 0)
1277                                 break;
1278                             track->default_duration = 1000000000 * (1. / num);
1279                             break;
1280                         }
1281
1282                         /* width of the size to display the video at */
1283                         case MATROSKA_ID_VIDEODISPLAYWIDTH: {
1284                             uint64_t num;
1285                             if ((res = ebml_read_uint(matroska, &id,
1286                                                       &num)) < 0)
1287                                 break;
1288                             videotrack->display_width = num;
1289                             break;
1290                         }
1291
1292                         /* height of the size to display the video at */
1293                         case MATROSKA_ID_VIDEODISPLAYHEIGHT: {
1294                             uint64_t num;
1295                             if ((res = ebml_read_uint(matroska, &id,
1296                                                       &num)) < 0)
1297                                 break;
1298                             videotrack->display_height = num;
1299                             break;
1300                         }
1301
1302                         /* width of the video in the file */
1303                         case MATROSKA_ID_VIDEOPIXELWIDTH: {
1304                             uint64_t num;
1305                             if ((res = ebml_read_uint(matroska, &id,
1306                                                       &num)) < 0)
1307                                 break;
1308                             videotrack->pixel_width = num;
1309                             break;
1310                         }
1311
1312                         /* height of the video in the file */
1313                         case MATROSKA_ID_VIDEOPIXELHEIGHT: {
1314                             uint64_t num;
1315                             if ((res = ebml_read_uint(matroska, &id,
1316                                                       &num)) < 0)
1317                                 break;
1318                             videotrack->pixel_height = num;
1319                             break;
1320                         }
1321
1322                         /* whether the video is interlaced */
1323                         case MATROSKA_ID_VIDEOFLAGINTERLACED: {
1324                             uint64_t num;
1325                             if ((res = ebml_read_uint(matroska, &id,
1326                                                       &num)) < 0)
1327                                 break;
1328                             if (num)
1329                                 track->flags |=
1330                                     MATROSKA_VIDEOTRACK_INTERLACED;
1331                             else
1332                                 track->flags &=
1333                                     ~MATROSKA_VIDEOTRACK_INTERLACED;
1334                             break;
1335                         }
1336
1337                         /* stereo mode (whether the video has two streams,
1338                          * where one is for the left eye and the other for
1339                          * the right eye, which creates a 3D-like
1340                          * effect) */
1341                         case MATROSKA_ID_VIDEOSTEREOMODE: {
1342                             uint64_t num;
1343                             if ((res = ebml_read_uint(matroska, &id,
1344                                                       &num)) < 0)
1345                                 break;
1346                             if (num != MATROSKA_EYE_MODE_MONO &&
1347                                 num != MATROSKA_EYE_MODE_LEFT &&
1348                                 num != MATROSKA_EYE_MODE_RIGHT &&
1349                                 num != MATROSKA_EYE_MODE_BOTH) {
1350                                 av_log(matroska->ctx, AV_LOG_INFO,
1351                                        "Ignoring unknown eye mode 0x%x\n",
1352                                        (uint32_t) num);
1353                                 break;
1354                             }
1355                             videotrack->eye_mode = num;
1356                             break;
1357                         }
1358
1359                         /* aspect ratio behaviour */
1360                         case MATROSKA_ID_VIDEOASPECTRATIO: {
1361                             uint64_t num;
1362                             if ((res = ebml_read_uint(matroska, &id,
1363                                                       &num)) < 0)
1364                                 break;
1365                             if (num != MATROSKA_ASPECT_RATIO_MODE_FREE &&
1366                                 num != MATROSKA_ASPECT_RATIO_MODE_KEEP &&
1367                                 num != MATROSKA_ASPECT_RATIO_MODE_FIXED) {
1368                                 av_log(matroska->ctx, AV_LOG_INFO,
1369                                        "Ignoring unknown aspect ratio 0x%x\n",
1370                                        (uint32_t) num);
1371                                 break;
1372                             }
1373                             videotrack->ar_mode = num;
1374                             break;
1375                         }
1376
1377                         /* colourspace (only matters for raw video)
1378                          * fourcc */
1379                         case MATROSKA_ID_VIDEOCOLOURSPACE: {
1380                             uint64_t num;
1381                             if ((res = ebml_read_uint(matroska, &id,
1382                                                       &num)) < 0)
1383                                 break;
1384                             videotrack->fourcc = num;
1385                             break;
1386                         }
1387
1388                         default:
1389                             av_log(matroska->ctx, AV_LOG_INFO,
1390                                    "Unknown video track header entry "
1391                                    "0x%x - ignoring\n", id);
1392                             /* pass-through */
1393
1394                         case EBML_ID_VOID:
1395                             res = ebml_read_skip(matroska);
1396                             break;
1397                     }
1398
1399                     if (matroska->level_up) {
1400                         matroska->level_up--;
1401                         break;
1402                     }
1403                 }
1404                 break;
1405             }
1406
1407             /* tracktype specific stuff for audio */
1408             case MATROSKA_ID_TRACKAUDIO: {
1409                 MatroskaAudioTrack *audiotrack;
1410                 if (track->type != MATROSKA_TRACK_TYPE_AUDIO) {
1411                     av_log(matroska->ctx, AV_LOG_INFO,
1412                            "audio data in non-audio track - ignoring\n");
1413                     res = AVERROR_INVALIDDATA;
1414                     break;
1415                 } else if ((res = ebml_read_master(matroska, &id)) < 0)
1416                     break;
1417                 audiotrack = (MatroskaAudioTrack *)track;
1418
1419                 while (res == 0) {
1420                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1421                         res = AVERROR_IO;
1422                         break;
1423                     } else if (matroska->level_up > 0) {
1424                         matroska->level_up--;
1425                         break;
1426                     }
1427
1428                     switch (id) {
1429                         /* samplerate */
1430                         case MATROSKA_ID_AUDIOSAMPLINGFREQ: {
1431                             double num;
1432                             if ((res = ebml_read_float(matroska, &id,
1433                                                        &num)) < 0)
1434                                 break;
1435                             audiotrack->samplerate = num;
1436                             break;
1437                         }
1438
1439                             /* bitdepth */
1440                         case MATROSKA_ID_AUDIOBITDEPTH: {
1441                             uint64_t num;
1442                             if ((res = ebml_read_uint(matroska, &id,
1443                                                       &num)) < 0)
1444                                 break;
1445                             audiotrack->bitdepth = num;
1446                             break;
1447                         }
1448
1449                             /* channels */
1450                         case MATROSKA_ID_AUDIOCHANNELS: {
1451                             uint64_t num;
1452                             if ((res = ebml_read_uint(matroska, &id,
1453                                                       &num)) < 0)
1454                                 break;
1455                             audiotrack->channels = num;
1456                             break;
1457                         }
1458
1459                         default:
1460                             av_log(matroska->ctx, AV_LOG_INFO,
1461                                    "Unknown audio track header entry "
1462                                    "0x%x - ignoring\n", id);
1463                             /* pass-through */
1464
1465                         case EBML_ID_VOID:
1466                             res = ebml_read_skip(matroska);
1467                             break;
1468                     }
1469
1470                     if (matroska->level_up) {
1471                         matroska->level_up--;
1472                         break;
1473                     }
1474                 }
1475                 break;
1476             }
1477
1478                 /* codec identifier */
1479             case MATROSKA_ID_CODECID: {
1480                 char *text;
1481                 if ((res = ebml_read_ascii(matroska, &id, &text)) < 0)
1482                     break;
1483                 track->codec_id = text;
1484                 break;
1485             }
1486
1487                 /* codec private data */
1488             case MATROSKA_ID_CODECPRIVATE: {
1489                 uint8_t *data;
1490                 int size;
1491                 if ((res = ebml_read_binary(matroska, &id, &data, &size) < 0))
1492                     break;
1493                 track->codec_priv = data;
1494                 track->codec_priv_size = size;
1495                 break;
1496             }
1497
1498                 /* name of the codec */
1499             case MATROSKA_ID_CODECNAME: {
1500                 char *text;
1501                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1502                     break;
1503                 track->codec_name = text;
1504                 break;
1505             }
1506
1507                 /* name of this track */
1508             case MATROSKA_ID_TRACKNAME: {
1509                 char *text;
1510                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1511                     break;
1512                 track->name = text;
1513                 break;
1514             }
1515
1516                 /* language (matters for audio/subtitles, mostly) */
1517             case MATROSKA_ID_TRACKLANGUAGE: {
1518                 char *text;
1519                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1520                     break;
1521                 track->language = text;
1522                 break;
1523             }
1524
1525                 /* whether this is actually used */
1526             case MATROSKA_ID_TRACKFLAGENABLED: {
1527                 uint64_t num;
1528                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1529                     break;
1530                 if (num)
1531                     track->flags |= MATROSKA_TRACK_ENABLED;
1532                 else
1533                     track->flags &= ~MATROSKA_TRACK_ENABLED;
1534                 break;
1535             }
1536
1537                 /* whether it's the default for this track type */
1538             case MATROSKA_ID_TRACKFLAGDEFAULT: {
1539                 uint64_t num;
1540                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1541                     break;
1542                 if (num)
1543                     track->flags |= MATROSKA_TRACK_DEFAULT;
1544                 else
1545                     track->flags &= ~MATROSKA_TRACK_DEFAULT;
1546                 break;
1547             }
1548
1549                 /* lacing (like MPEG, where blocks don't end/start on frame
1550                  * boundaries) */
1551             case MATROSKA_ID_TRACKFLAGLACING: {
1552                 uint64_t num;
1553                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1554                     break;
1555                 if (num)
1556                     track->flags |= MATROSKA_TRACK_LACING;
1557                 else
1558                     track->flags &= ~MATROSKA_TRACK_LACING;
1559                 break;
1560             }
1561
1562                 /* default length (in time) of one data block in this track */
1563             case MATROSKA_ID_TRACKDEFAULTDURATION: {
1564                 uint64_t num;
1565                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1566                     break;
1567                 track->default_duration = num;
1568                 break;
1569             }
1570
1571             default:
1572                 av_log(matroska->ctx, AV_LOG_INFO,
1573                        "Unknown track header entry 0x%x - ignoring\n", id);
1574                 /* pass-through */
1575
1576             case EBML_ID_VOID:
1577             /* we ignore these because they're nothing useful. */
1578             case MATROSKA_ID_CODECINFOURL:
1579             case MATROSKA_ID_CODECDOWNLOADURL:
1580             case MATROSKA_ID_TRACKMINCACHE:
1581             case MATROSKA_ID_TRACKMAXCACHE:
1582                 res = ebml_read_skip(matroska);
1583                 break;
1584         }
1585
1586         if (matroska->level_up) {
1587             matroska->level_up--;
1588             break;
1589         }
1590     }
1591
1592     return res;
1593 }
1594
1595 static int
1596 matroska_parse_tracks (MatroskaDemuxContext *matroska)
1597 {
1598     int res = 0;
1599     uint32_t id;
1600
1601     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing tracks...\n");
1602
1603     while (res == 0) {
1604         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1605             res = AVERROR_IO;
1606             break;
1607         } else if (matroska->level_up) {
1608             matroska->level_up--;
1609             break;
1610         }
1611
1612         switch (id) {
1613             /* one track within the "all-tracks" header */
1614             case MATROSKA_ID_TRACKENTRY:
1615                 res = matroska_add_stream(matroska);
1616                 break;
1617
1618             default:
1619                 av_log(matroska->ctx, AV_LOG_INFO,
1620                        "Unknown entry 0x%x in track header\n", id);
1621                 /* fall-through */
1622
1623             case EBML_ID_VOID:
1624                 res = ebml_read_skip(matroska);
1625                 break;
1626         }
1627
1628         if (matroska->level_up) {
1629             matroska->level_up--;
1630             break;
1631         }
1632     }
1633
1634     return res;
1635 }
1636
1637 static int
1638 matroska_parse_index (MatroskaDemuxContext *matroska)
1639 {
1640     int res = 0;
1641     uint32_t id;
1642     MatroskaDemuxIndex idx;
1643
1644     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing index...\n");
1645
1646     while (res == 0) {
1647         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1648             res = AVERROR_IO;
1649             break;
1650         } else if (matroska->level_up) {
1651             matroska->level_up--;
1652             break;
1653         }
1654
1655         switch (id) {
1656             /* one single index entry ('point') */
1657             case MATROSKA_ID_POINTENTRY:
1658                 if ((res = ebml_read_master(matroska, &id)) < 0)
1659                     break;
1660
1661                 /* in the end, we hope to fill one entry with a
1662                  * timestamp, a file position and a tracknum */
1663                 idx.pos   = (uint64_t) -1;
1664                 idx.time  = (uint64_t) -1;
1665                 idx.track = (uint16_t) -1;
1666
1667                 while (res == 0) {
1668                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1669                         res = AVERROR_IO;
1670                         break;
1671                     } else if (matroska->level_up) {
1672                         matroska->level_up--;
1673                         break;
1674                     }
1675
1676                     switch (id) {
1677                         /* one single index entry ('point') */
1678                         case MATROSKA_ID_CUETIME: {
1679                             int64_t time;
1680                             if ((res = ebml_read_uint(matroska, &id,
1681                                                       &time)) < 0)
1682                                 break;
1683                             idx.time = time * matroska->time_scale;
1684                             break;
1685                         }
1686
1687                         /* position in the file + track to which it 
1688                          * belongs */
1689                         case MATROSKA_ID_CUETRACKPOSITION:
1690                             if ((res = ebml_read_master(matroska, &id)) < 0)
1691                                 break;
1692
1693                             while (res == 0) {
1694                                 if (!(id = ebml_peek_id (matroska,
1695                                                     &matroska->level_up))) {
1696                                     res = AVERROR_IO;
1697                                     break;
1698                                 } else if (matroska->level_up) {
1699                                     matroska->level_up--;
1700                                     break;
1701                                 }
1702
1703                                 switch (id) {
1704                                     /* track number */
1705                                     case MATROSKA_ID_CUETRACK: {
1706                                         uint64_t num;
1707                                         if ((res = ebml_read_uint(matroska,
1708                                                           &id, &num)) < 0)
1709                                             break;
1710                                         idx.track = num;
1711                                         break;
1712                                     }
1713
1714                                         /* position in file */
1715                                     case MATROSKA_ID_CUECLUSTERPOSITION: {
1716                                         uint64_t num;
1717                                         if ((res = ebml_read_uint(matroska,
1718                                                           &id, &num)) < 0)
1719                                             break;
1720                                         idx.pos = num;
1721                                         break;
1722                                     }
1723
1724                                     default:
1725                                         av_log(matroska->ctx, AV_LOG_INFO,
1726                                                "Unknown entry 0x%x in "
1727                                                "CuesTrackPositions\n", id);
1728                                         /* fall-through */
1729
1730                                     case EBML_ID_VOID:
1731                                         res = ebml_read_skip(matroska);
1732                                         break;
1733                                 }
1734
1735                                 if (matroska->level_up) {
1736                                     matroska->level_up--;
1737                                     break;
1738                                 }
1739                             }
1740
1741                             break;
1742
1743                         default:
1744                             av_log(matroska->ctx, AV_LOG_INFO,
1745                                    "Unknown entry 0x%x in cuespoint "
1746                                    "index\n", id);
1747                             /* fall-through */
1748
1749                         case EBML_ID_VOID:
1750                             res = ebml_read_skip(matroska);
1751                             break;
1752                     }
1753
1754                     if (matroska->level_up) {
1755                         matroska->level_up--;
1756                         break;
1757                     }
1758                 }
1759
1760                 /* so let's see if we got what we wanted */
1761                 if (idx.pos   != (uint64_t) -1 &&
1762                     idx.time  != (uint64_t) -1 &&
1763                     idx.track != (uint16_t) -1) {
1764                     if (matroska->num_indexes % 32 == 0) {
1765                         /* re-allocate bigger index */
1766                         matroska->index =
1767                             av_realloc(matroska->index,
1768                                        (matroska->num_indexes + 32) *
1769                                        sizeof(MatroskaDemuxIndex));
1770                     }
1771                     matroska->index[matroska->num_indexes] = idx;
1772                     matroska->num_indexes++;
1773                 }
1774                 break;
1775
1776             default:
1777                 av_log(matroska->ctx, AV_LOG_INFO,
1778                        "Unknown entry 0x%x in cues header\n", id);
1779                 /* fall-through */
1780
1781             case EBML_ID_VOID:
1782                 res = ebml_read_skip(matroska);
1783                 break;
1784         }
1785
1786         if (matroska->level_up) {
1787             matroska->level_up--;
1788             break;
1789         }
1790     }
1791
1792     return res;
1793 }
1794
1795 static int
1796 matroska_parse_metadata (MatroskaDemuxContext *matroska)
1797 {
1798     int res = 0;
1799     uint32_t id;
1800
1801     while (res == 0) {
1802         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1803             res = AVERROR_IO;
1804             break;
1805         } else if (matroska->level_up) {
1806             matroska->level_up--;
1807             break;
1808         }
1809
1810         switch (id) {
1811             /* Hm, this is unsupported... */
1812             default:
1813                 av_log(matroska->ctx, AV_LOG_INFO,
1814                        "Unknown entry 0x%x in metadata header\n", id);
1815                 /* fall-through */
1816
1817             case EBML_ID_VOID:
1818                 res = ebml_read_skip(matroska);
1819                 break;
1820         }
1821
1822         if (matroska->level_up) {
1823             matroska->level_up--;
1824             break;
1825         }
1826     }
1827
1828     return res;
1829 }
1830
1831 static int
1832 matroska_parse_seekhead (MatroskaDemuxContext *matroska)
1833 {
1834     int res = 0;
1835     uint32_t id;
1836
1837     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing seekhead...\n");
1838
1839     while (res == 0) {
1840         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1841             res = AVERROR_IO;
1842             break;
1843         } else if (matroska->level_up) {
1844             matroska->level_up--;
1845             break;
1846         }
1847
1848         switch (id) {
1849             case MATROSKA_ID_SEEKENTRY: {
1850                 uint32_t seek_id = 0, peek_id_cache = 0;
1851                 uint64_t seek_pos = (uint64_t) -1, t;
1852
1853                 if ((res = ebml_read_master(matroska, &id)) < 0)
1854                     break;
1855
1856                 while (res == 0) {
1857                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1858                         res = AVERROR_IO;
1859                         break;
1860                     } else if (matroska->level_up) {
1861                         matroska->level_up--;
1862                         break;
1863                     }
1864
1865                     switch (id) {
1866                         case MATROSKA_ID_SEEKID:
1867                             res = ebml_read_uint(matroska, &id, &t);
1868                             seek_id = t;
1869                             break;
1870
1871                         case MATROSKA_ID_SEEKPOSITION:
1872                             res = ebml_read_uint(matroska, &id, &seek_pos);
1873                             break;
1874
1875                         default:
1876                             av_log(matroska->ctx, AV_LOG_INFO,
1877                                    "Unknown seekhead ID 0x%x\n", id);
1878                             /* fall-through */
1879
1880                         case EBML_ID_VOID:
1881                             res = ebml_read_skip(matroska);
1882                             break;
1883                     }
1884
1885                     if (matroska->level_up) {
1886                         matroska->level_up--;
1887                         break;
1888                     }
1889                 }
1890
1891                 if (!seek_id || seek_pos == (uint64_t) -1) {
1892                     av_log(matroska->ctx, AV_LOG_INFO,
1893                            "Incomplete seekhead entry (0x%x/%llu)\n",
1894                            seek_id, seek_pos);
1895                     break;
1896                 }
1897
1898                 switch (seek_id) {
1899                     case MATROSKA_ID_CUES:
1900                     case MATROSKA_ID_TAGS: {
1901                         uint32_t level_up = matroska->level_up;
1902                         offset_t before_pos;
1903                         uint64_t length;
1904                         MatroskaLevel level;
1905
1906                         /* remember the peeked ID and the current position */
1907                         peek_id_cache = matroska->peek_id;
1908                         before_pos = url_ftell(&matroska->ctx->pb);
1909
1910                         /* seek */
1911                         if ((res = ebml_read_seek(matroska, seek_pos +
1912                                                matroska->segment_start)) < 0)
1913                             return res;
1914
1915                         /* we don't want to lose our seekhead level, so we add
1916                          * a dummy. This is a crude hack. */
1917                         if (matroska->num_levels == EBML_MAX_DEPTH) {
1918                             av_log(matroska->ctx, AV_LOG_INFO,
1919                                    "Max EBML element depth (%d) reached, "
1920                                    "cannot parse further.\n", EBML_MAX_DEPTH);
1921                             return AVERROR_UNKNOWN;
1922                         }
1923                             
1924                         level.start = 0;
1925                         level.length = (uint64_t)-1;
1926                         matroska->levels[matroska->num_levels] = level;
1927                         matroska->num_levels++;
1928
1929                         /* check ID */
1930                         if (!(id = ebml_peek_id (matroska,
1931                                                  &matroska->level_up)))
1932                             break;
1933                         if (id != seek_id) {
1934                             av_log(matroska->ctx, AV_LOG_INFO,
1935                                    "We looked for ID=0x%x but got "
1936                                    "ID=0x%x (pos=%llu)",
1937                                    seek_id, id, seek_pos +
1938                                    matroska->segment_start);
1939                             goto finish;
1940                         }
1941
1942                         /* read master + parse */
1943                         if ((res = ebml_read_master(matroska, &id)) < 0)
1944                             break;
1945                         switch (id) {
1946                             case MATROSKA_ID_CUES:
1947                                 if (!(res = matroska_parse_index(matroska)) ||
1948                                     url_feof(&matroska->ctx->pb)) {
1949                                     matroska->index_parsed = 1;
1950                                     res = 0;
1951                                 }
1952                                 break;
1953                             case MATROSKA_ID_TAGS:
1954                                 if (!(res = matroska_parse_metadata(matroska)) ||
1955                                    url_feof(&matroska->ctx->pb)) {
1956                                     matroska->metadata_parsed = 1;
1957                                     res = 0;
1958                                 }
1959                                 break;
1960                         }
1961                         if (res < 0)
1962                             break;
1963
1964                     finish:
1965                         /* remove dummy level */
1966                         while (matroska->num_levels) {
1967                             matroska->num_levels--;
1968                             length =
1969                                 matroska->levels[matroska->num_levels].length;
1970                             if (length == (uint64_t)-1)
1971                                 break;
1972                         }
1973
1974                         /* seek back */
1975                         if ((res = ebml_read_seek(matroska, before_pos)) < 0)
1976                             return res;
1977                         matroska->peek_id = peek_id_cache;
1978                         matroska->level_up = level_up;
1979                         break;
1980                     }
1981
1982                     default:
1983                         av_log(matroska->ctx, AV_LOG_INFO,
1984                                "Ignoring seekhead entry for ID=0x%x\n",
1985                                seek_id);
1986                         break;
1987                 }
1988
1989                 break;
1990             }
1991
1992             default:
1993                 av_log(matroska->ctx, AV_LOG_INFO,
1994                        "Unknown seekhead ID 0x%x\n", id);
1995                 /* fall-through */
1996
1997             case EBML_ID_VOID:
1998                 res = ebml_read_skip(matroska);
1999                 break;
2000         }
2001
2002         if (matroska->level_up) {
2003             matroska->level_up--;
2004             break;
2005         }
2006     }
2007
2008     return res;
2009 }
2010
2011 static int
2012 matroska_read_header (AVFormatContext    *s,
2013                       AVFormatParameters *ap)
2014 {
2015     MatroskaDemuxContext *matroska = s->priv_data;
2016     char *doctype;
2017     int version, last_level, res = 0;
2018     uint32_t id;
2019
2020     matroska->ctx = s;
2021
2022     /* First read the EBML header. */
2023     doctype = NULL;
2024     if ((res = ebml_read_header(matroska, &doctype, &version)) < 0)
2025         return res;
2026     if ((doctype == NULL) || strcmp(doctype, "matroska")) {
2027         av_log(matroska->ctx, AV_LOG_ERROR,
2028                "Wrong EBML doctype ('%s' != 'matroska').\n",
2029                doctype ? doctype : "(none)");
2030         if (doctype)
2031             av_free(doctype);
2032         return AVERROR_NOFMT;
2033     }
2034     av_free(doctype);
2035     if (version != 1) {
2036         av_log(matroska->ctx, AV_LOG_ERROR,
2037                "Matroska demuxer version 1 too old for file version %d\n",
2038                version);
2039         return AVERROR_NOFMT;
2040     }
2041
2042     /* The next thing is a segment. */
2043     while (1) {
2044         if (!(id = ebml_peek_id(matroska, &last_level)))
2045             return AVERROR_IO;
2046         if (id == MATROSKA_ID_SEGMENT)
2047             break;
2048
2049         /* oi! */
2050         av_log(matroska->ctx, AV_LOG_INFO,
2051                "Expected a Segment ID (0x%x), but received 0x%x!\n",
2052                MATROSKA_ID_SEGMENT, id);
2053         if ((res = ebml_read_skip(matroska)) < 0)
2054             return res;
2055     }
2056
2057     /* We now have a Matroska segment.
2058      * Seeks are from the beginning of the segment,
2059      * after the segment ID/length. */
2060     if ((res = ebml_read_master(matroska, &id)) < 0)
2061         return res;
2062     matroska->segment_start = url_ftell(&s->pb);
2063
2064     matroska->time_scale = 1000000;
2065     /* we've found our segment, start reading the different contents in here */
2066     while (res == 0) {
2067         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2068             res = AVERROR_IO;
2069             break;
2070         } else if (matroska->level_up) {
2071             matroska->level_up--;
2072             break;
2073         }
2074
2075         switch (id) {
2076             /* stream info */
2077             case MATROSKA_ID_INFO: {
2078                 if ((res = ebml_read_master(matroska, &id)) < 0)
2079                     break;
2080                 res = matroska_parse_info(matroska);
2081                 break;
2082             }
2083
2084             /* track info headers */
2085             case MATROSKA_ID_TRACKS: {
2086                 if ((res = ebml_read_master(matroska, &id)) < 0)
2087                     break;
2088                 res = matroska_parse_tracks(matroska);
2089                 break;
2090             }
2091
2092             /* stream index */
2093             case MATROSKA_ID_CUES: {
2094                 if (!matroska->index_parsed) {
2095                     if ((res = ebml_read_master(matroska, &id)) < 0)
2096                         break;
2097                     res = matroska_parse_index(matroska);
2098                 } else
2099                     res = ebml_read_skip(matroska);
2100                 break;
2101             }
2102
2103             /* metadata */
2104             case MATROSKA_ID_TAGS: {
2105                 if (!matroska->metadata_parsed) {
2106                     if ((res = ebml_read_master(matroska, &id)) < 0)
2107                         break;
2108                     res = matroska_parse_metadata(matroska);
2109                 } else
2110                     res = ebml_read_skip(matroska);
2111                 break;
2112             }
2113
2114             /* file index (if seekable, seek to Cues/Tags to parse it) */
2115             case MATROSKA_ID_SEEKHEAD: {
2116                 if ((res = ebml_read_master(matroska, &id)) < 0)
2117                     break;
2118                 res = matroska_parse_seekhead(matroska);
2119                 break;
2120             }
2121
2122             case MATROSKA_ID_CLUSTER: {
2123                 /* Do not read the master - this will be done in the next
2124                  * call to matroska_read_packet. */
2125                 res = 1;
2126                 break;
2127             }
2128
2129             default:
2130                 av_log(matroska->ctx, AV_LOG_INFO,
2131                        "Unknown matroska file header ID 0x%x\n", id);
2132             /* fall-through */
2133
2134             case EBML_ID_VOID:
2135                 res = ebml_read_skip(matroska);
2136                 break;
2137         }
2138
2139         if (matroska->level_up) {
2140             matroska->level_up--;
2141             break;
2142         }
2143     }
2144
2145     if (res < 0)
2146         return res;
2147
2148     /* Have we found a cluster? */
2149     if (res == 1) {
2150         int i;
2151         enum CodecID codec_id;
2152         MatroskaTrack *track;
2153         AVStream *st;
2154
2155         av_set_pts_info(s, 24, 1, 1000); /* 24 bit pts in ms */
2156
2157         for (i = 0; i < matroska->num_tracks; i++) {
2158             track = matroska->tracks[i];
2159
2160             /* libavformat does not really support subtitles.
2161              * Also apply some sanity checks. */
2162             if ((track->type == MATROSKA_TRACK_TYPE_SUBTITLE) ||
2163                 (track->codec_id == NULL))
2164                 continue;
2165
2166             /* Set the FourCC from the CodecID. */
2167             /* This is the MS compatibility mode which stores a
2168              * BITMAPINFOHEADER in the CodecPrivate. */
2169             if (!strcmp(track->codec_id,
2170                         MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC) &&
2171                 (track->codec_priv_size >= 40) &&
2172                 (track->codec_priv != NULL)) {
2173                 unsigned char *p;
2174
2175                 /* Offset of biCompression. Stored in LE. */
2176                 p = (unsigned char *)track->codec_priv + 16;
2177                 ((MatroskaVideoTrack *)track)->fourcc = (p[3] << 24) |
2178                                  (p[2] << 16) | (p[1] << 8) | p[0];
2179                 codec_id = codec_get_bmp_id(((MatroskaVideoTrack *)track)->fourcc);
2180
2181             } else if (!strcmp(track->codec_id,
2182                                MATROSKA_CODEC_ID_VIDEO_MPEG4_SP) ||
2183                        !strcmp(track->codec_id,
2184                                MATROSKA_CODEC_ID_VIDEO_MPEG4_ASP) ||
2185                        !strcmp(track->codec_id,
2186                                MATROSKA_CODEC_ID_VIDEO_MPEG4_AP))
2187                 codec_id = CODEC_ID_MPEG4;
2188 /*             else if (!strcmp(track->codec_id, */
2189 /*                              MATROSKA_CODEC_ID_VIDEO_UNCOMPRESSED)) */
2190 /*                 codec_id = CODEC_ID_???; */
2191             else if (!strcmp(track->codec_id,
2192                              MATROSKA_CODEC_ID_VIDEO_MSMPEG4V3))
2193                 codec_id = CODEC_ID_MSMPEG4V3;
2194             else if (!strcmp(track->codec_id,
2195                              MATROSKA_CODEC_ID_VIDEO_MPEG1) ||
2196                      !strcmp(track->codec_id,
2197                              MATROSKA_CODEC_ID_VIDEO_MPEG2))
2198                 codec_id = CODEC_ID_MPEG2VIDEO;
2199
2200             /* This is the MS compatibility mode which stores a
2201              * WAVEFORMATEX in the CodecPrivate. */
2202             else if (!strcmp(track->codec_id, 
2203                              MATROSKA_CODEC_ID_AUDIO_ACM) &&
2204                 (track->codec_priv_size >= 18) &&
2205                 (track->codec_priv != NULL)) {
2206                 unsigned char *p;
2207                 uint16_t tag;
2208
2209                 /* Offset of wFormatTag. Stored in LE. */
2210                 p = (unsigned char *)track->codec_priv;
2211                 tag = (p[1] << 8) | p[0];
2212                 codec_id = codec_get_wav_id(tag);
2213
2214             } else if (!strcmp(track->codec_id,
2215                                MATROSKA_CODEC_ID_AUDIO_MPEG1_L1) ||
2216                        !strcmp(track->codec_id,
2217                                MATROSKA_CODEC_ID_AUDIO_MPEG1_L2) ||
2218                        !strcmp(track->codec_id,
2219                                MATROSKA_CODEC_ID_AUDIO_MPEG1_L3))
2220                 codec_id = CODEC_ID_MP3;
2221             else if (!strcmp(track->codec_id,
2222                              MATROSKA_CODEC_ID_AUDIO_PCM_INT_BE))
2223                 codec_id = CODEC_ID_PCM_U16BE;
2224             else if (!strcmp(track->codec_id,
2225                              MATROSKA_CODEC_ID_AUDIO_PCM_INT_LE))
2226                 codec_id = CODEC_ID_PCM_U16LE;
2227 /*             else if (!strcmp(track->codec_id, */
2228 /*                              MATROSKA_CODEC_ID_AUDIO_PCM_FLOAT)) */
2229 /*                 codec_id = CODEC_ID_PCM_???; */
2230             else if (!strcmp(track->codec_id,
2231                              MATROSKA_CODEC_ID_AUDIO_AC3))
2232                 codec_id = CODEC_ID_AC3;
2233             /* No such codec id so far. */
2234 /*             else if (!strcmp(track->codec_id, */
2235 /*                              MATROSKA_CODEC_ID_AUDIO_DTS)) */
2236 /*                 codec_id = CODEC_ID_DTS; */
2237             else if (!strcmp(track->codec_id,
2238                              MATROSKA_CODEC_ID_AUDIO_VORBIS))
2239                 codec_id = CODEC_ID_VORBIS;
2240             else if (!strcmp(track->codec_id,
2241                              MATROSKA_CODEC_ID_AUDIO_MPEG2) ||
2242                      !strcmp(track->codec_id,
2243                              MATROSKA_CODEC_ID_AUDIO_MPEG4))
2244                 codec_id = CODEC_ID_AAC;
2245             else
2246                 codec_id = CODEC_ID_NONE;
2247
2248             if (codec_id == CODEC_ID_NONE) {
2249                 av_log(matroska->ctx, AV_LOG_INFO,
2250                        "Unknown/unsupported CodecID %s.\n",
2251                        track->codec_id);
2252             }
2253
2254             track->stream_index = matroska->num_streams;
2255
2256             matroska->num_streams++;
2257             st = av_new_stream(s, track->stream_index);
2258             if (st == NULL)
2259                 return AVERROR_NOMEM;
2260
2261             st->codec.codec_id = codec_id;
2262
2263             if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
2264                 MatroskaVideoTrack *videotrack = (MatroskaVideoTrack *)track;
2265
2266                 st->codec.codec_type = CODEC_TYPE_VIDEO;
2267                 st->codec.codec_tag = videotrack->fourcc;
2268                 st->codec.width = videotrack->pixel_width;
2269                 st->codec.height = videotrack->pixel_height;
2270                 if (videotrack->display_width == 0)
2271                     st->codec.sample_aspect_ratio.num =
2272                         videotrack->pixel_width;
2273                 else
2274                     st->codec.sample_aspect_ratio.num =
2275                         videotrack->display_width;
2276                 if (videotrack->display_height == 0)
2277                     st->codec.sample_aspect_ratio.num =
2278                         videotrack->pixel_height;
2279                 else
2280                     st->codec.sample_aspect_ratio.num =
2281                         videotrack->display_height;
2282
2283             } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
2284                 MatroskaAudioTrack *audiotrack = (MatroskaAudioTrack *)track;
2285
2286                 st->codec.codec_type = CODEC_TYPE_AUDIO;
2287                 st->codec.sample_rate = audiotrack->samplerate;
2288                 st->codec.channels = audiotrack->channels;
2289             }
2290
2291             /* What do we do with private data? E.g. for Vorbis. */
2292         }
2293     }
2294
2295     return 0;
2296 }
2297
2298 static int
2299 matroska_find_track_by_num (MatroskaDemuxContext *matroska,
2300                             int                   num)
2301 {
2302     int i;
2303
2304     for (i = 0; i < matroska->num_tracks; i++)
2305         if (matroska->tracks[i]->num == num)
2306             return i;
2307
2308     return -1;
2309 }
2310
2311 static int
2312 matroska_parse_blockgroup (MatroskaDemuxContext *matroska,
2313                            uint64_t              cluster_time)
2314 {
2315     int res = 0;
2316     uint32_t id;
2317     AVPacket *pkt;
2318     int is_keyframe = PKT_FLAG_KEY, last_num_packets = matroska->num_packets;
2319
2320     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing blockgroup...\n");
2321
2322     while (res == 0) {
2323         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2324             res = AVERROR_IO;
2325             break;
2326         } else if (matroska->level_up) {
2327             matroska->level_up--;
2328             break;
2329         }
2330
2331         switch (id) {
2332             /* one block inside the group. Note, block parsing is one
2333              * of the harder things, so this code is a bit complicated.
2334              * See http://www.matroska.org/ for documentation. */
2335             case MATROSKA_ID_BLOCK: {
2336                 uint8_t *data, *origdata;
2337                 int size;
2338                 uint64_t time;
2339                 uint32_t *lace_size = NULL;
2340                 int n, track, flags, laces = 0;
2341                 uint64_t num;
2342
2343                 if ((res = ebml_read_binary(matroska, &id, &data, &size)) < 0)
2344                     break;
2345                 origdata = data;
2346
2347                 /* first byte(s): blocknum */
2348                 if ((n = matroska_ebmlnum_uint(data, size, &num)) < 0) {
2349                     av_log(matroska->ctx, AV_LOG_ERROR,
2350                            "EBML block data error\n");
2351                     av_free(origdata);
2352                     break;
2353                 }
2354                 data += n;
2355                 size -= n;
2356
2357                 /* fetch track from num */
2358                 track = matroska_find_track_by_num(matroska, num);
2359                 if (size <= 3 || track < 0 || track >= matroska->num_tracks) {
2360                     av_log(matroska->ctx, AV_LOG_INFO,
2361                            "Invalid stream %d or size %u\n", track, size);
2362                     av_free(origdata);
2363                     break;
2364                 }
2365
2366                 /* time (relative to cluster time) */
2367                 time = ((data[0] << 8) | data[1]) * matroska->time_scale;
2368                 data += 2;
2369                 size -= 2;
2370                 flags = *data;
2371                 data += 1;
2372                 size -= 1;
2373                 switch ((flags & 0x06) >> 1) {
2374                     case 0x0: /* no lacing */
2375                         laces = 1;
2376                         lace_size = av_mallocz(sizeof(int));
2377                         lace_size[0] = size;
2378                         break;
2379
2380                     case 0x1: /* xiph lacing */
2381                     case 0x2: /* fixed-size lacing */
2382                     case 0x3: /* EBML lacing */
2383                         if (size == 0) {
2384                             res = -1;
2385                             break;
2386                         }
2387                         laces = (*data) + 1;
2388                         data += 1;
2389                         size -= 1;
2390                         lace_size = av_mallocz(laces * sizeof(int));
2391
2392                         switch ((flags & 0x06) >> 1) {
2393                             case 0x1: /* xiph lacing */ {
2394                                 uint8_t temp;
2395                                 uint32_t total = 0;
2396                                 for (n = 0; res == 0 && n < laces - 1; n++) {
2397                                     while (1) {
2398                                         if (size == 0) {
2399                                             res = -1;
2400                                             break;
2401                                         }
2402                                         temp = *data;
2403                                         lace_size[n] += temp;
2404                                         data += 1;
2405                                         size -= 1;
2406                                         if (temp != 0xff)
2407                                             break;
2408                                     }
2409                                     total += lace_size[n];
2410                                 }
2411                                 lace_size[n] = size - total;
2412                                 break;
2413                             }
2414
2415                             case 0x2: /* fixed-size lacing */
2416                                 for (n = 0; n < laces; n++)
2417                                     lace_size[n] = size / laces;
2418                                 break;
2419
2420                             case 0x3: /* EBML lacing */ {
2421                                 uint32_t total;
2422                                 n = matroska_ebmlnum_uint(data, size, &num);
2423                                 if (n < 0) {
2424                                     av_log(matroska->ctx, AV_LOG_INFO,
2425                                            "EBML block data error\n");
2426                                     break;
2427                                 }
2428                                 data += n;
2429                                 size -= n;
2430                                 total = lace_size[0] = num;
2431                                 for (n = 1; res == 0 && n < laces - 1; n++) {
2432                                     int64_t snum;
2433                                     int r;
2434                                     r = matroska_ebmlnum_sint (data, size,
2435                                                                &snum);
2436                                     if (r < 0) {
2437                                         av_log(matroska->ctx, AV_LOG_INFO,
2438                                                "EBML block data error\n");
2439                                         break;
2440                                     }
2441                                     data += r;
2442                                     size -= r;
2443                                     lace_size[n] = lace_size[n - 1] + snum;
2444                                     total += lace_size[n];
2445                                 }
2446                                 lace_size[n] = size - total;
2447                                 break;
2448                             }
2449                         }
2450                         break;
2451                 }
2452
2453                 if (res == 0) {
2454                     for (n = 0; n < laces; n++) {
2455                         uint64_t timecode = 0;
2456
2457                         pkt = av_mallocz(sizeof(AVPacket));
2458                         /* XXX: prevent data copy... */
2459                         if (av_new_packet(pkt,lace_size[n]) < 0) {
2460                             res = AVERROR_NOMEM;
2461                             break;
2462                         }
2463                         if (cluster_time != (uint64_t)-1) {
2464                             if (time < 0 && (-time) > cluster_time)
2465                                 timecode = cluster_time;
2466                             else
2467                                 timecode = cluster_time + time;
2468                         }
2469                         /* FIXME: duration */
2470
2471                         memcpy(pkt->data, data, lace_size[n]);
2472                         data += lace_size[n];
2473                         if (n == 0)
2474                             pkt->flags = is_keyframe;
2475                         pkt->stream_index =
2476                             matroska->tracks[track]->stream_index;
2477
2478                         pkt->pts = timecode / 1000000; /* ns to ms */
2479
2480                         matroska_queue_packet(matroska, pkt);
2481                     }
2482                 }
2483
2484                 av_free(lace_size);
2485                 av_free(origdata);
2486                 break;
2487             }
2488
2489             case MATROSKA_ID_BLOCKDURATION: {
2490                 uint64_t num;
2491                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
2492                     break;
2493                 av_log(matroska->ctx, AV_LOG_INFO,
2494                        "FIXME: implement support for BlockDuration\n");
2495                 break;
2496             }
2497
2498             case MATROSKA_ID_BLOCKREFERENCE:
2499                 /* We've found a reference, so not even the first frame in
2500                  * the lace is a key frame. */
2501                 is_keyframe = 0;
2502                 if (last_num_packets != matroska->num_packets)
2503                     matroska->packets[last_num_packets]->flags = 0;
2504                 res = ebml_read_skip(matroska);
2505                 break;
2506
2507             default:
2508                 av_log(matroska->ctx, AV_LOG_INFO,
2509                        "Unknown entry 0x%x in blockgroup data\n", id);
2510                 /* fall-through */
2511
2512             case EBML_ID_VOID:
2513                 res = ebml_read_skip(matroska);
2514                 break;
2515         }
2516
2517         if (matroska->level_up) {
2518             matroska->level_up--;
2519             break;
2520         }
2521     }
2522
2523     return res;
2524 }
2525
2526 static int
2527 matroska_parse_cluster (MatroskaDemuxContext *matroska)
2528 {
2529     int res = 0;
2530     uint32_t id;
2531     uint64_t cluster_time = 0;
2532
2533     av_log(matroska->ctx, AV_LOG_DEBUG,
2534            "parsing cluster at %lld\n", url_ftell(&matroska->ctx->pb));
2535
2536     while (res == 0) {
2537         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2538             res = AVERROR_IO;
2539             break;
2540         } else if (matroska->level_up) {
2541             matroska->level_up--;
2542             break;
2543         }
2544
2545         switch (id) {
2546             /* cluster timecode */
2547             case MATROSKA_ID_CLUSTERTIMECODE: {
2548                 uint64_t num;
2549                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
2550                     break;
2551                 cluster_time = num * matroska->time_scale;
2552                 break;
2553             }
2554
2555                 /* a group of blocks inside a cluster */
2556             case MATROSKA_ID_BLOCKGROUP:
2557                 if ((res = ebml_read_master(matroska, &id)) < 0)
2558                     break;
2559                 res = matroska_parse_blockgroup(matroska, cluster_time);
2560                 break;
2561
2562             default:
2563                 av_log(matroska->ctx, AV_LOG_INFO,
2564                        "Unknown entry 0x%x in cluster data\n", id);
2565                 /* fall-through */
2566
2567             case EBML_ID_VOID:
2568                 res = ebml_read_skip(matroska);
2569                 break;
2570         }
2571
2572         if (matroska->level_up) {
2573             matroska->level_up--;
2574             break;
2575         }
2576     }
2577
2578     return res;
2579 }
2580
2581 static int
2582 matroska_read_packet (AVFormatContext *s,
2583                       AVPacket        *pkt)
2584 {
2585     MatroskaDemuxContext *matroska = s->priv_data;
2586     int res = 0;
2587     uint32_t id;
2588
2589     /* Do we still have a packet queued? */
2590     if (matroska_deliver_packet(matroska, pkt) == 0)
2591         return 0;
2592
2593     /* Have we already reached the end? */
2594     if (matroska->done)
2595         return AVERROR_IO;
2596
2597     while (res == 0) {
2598         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2599             res = AVERROR_IO;
2600             break;
2601         } else if (matroska->level_up) {
2602             matroska->level_up--;
2603             break;
2604         }
2605
2606         switch (id) {
2607             case MATROSKA_ID_CLUSTER:
2608                 if ((res = ebml_read_master(matroska, &id)) < 0)
2609                     break;
2610                 if ((res = matroska_parse_cluster(matroska)) == 0)
2611                     res = 1; /* Parsed one cluster, let's get out. */
2612                 break;
2613
2614             default:
2615             case EBML_ID_VOID:
2616                 res = ebml_read_skip(matroska);
2617                 break;
2618         }
2619
2620         if (matroska->level_up) {
2621             matroska->level_up--;
2622             break;
2623         }
2624     }
2625
2626     if (res == -1)
2627         matroska->done = 1;
2628
2629     return matroska_deliver_packet(matroska, pkt);
2630 }
2631
2632 static int
2633 matroska_read_close (AVFormatContext *s)
2634 {
2635     MatroskaDemuxContext *matroska = s->priv_data;
2636     int n = 0;
2637
2638     if (matroska->writing_app)
2639         av_free(matroska->writing_app);
2640     if (matroska->muxing_app)
2641         av_free(matroska->muxing_app);
2642     if (matroska->index)
2643         av_free(matroska->index);
2644
2645     if (matroska->packets != NULL) {
2646         for (n = 0; n < matroska->num_packets; n++) {
2647             av_free_packet(matroska->packets[n]);
2648             av_free(matroska->packets[n]);
2649         }
2650         av_free(matroska->packets);
2651     }
2652
2653     for (n = 0; n < matroska->num_tracks; n++) {
2654         MatroskaTrack *track = matroska->tracks[n];
2655         if (track->codec_id)
2656             av_free(track->codec_id);
2657         if (track->codec_name)
2658             av_free(track->codec_name);
2659         if (track->codec_priv)
2660             av_free(track->codec_priv);
2661         if (track->name)
2662             av_free(track->name);
2663         if (track->language)
2664             av_free(track->language);
2665
2666         av_free(track);
2667     }
2668
2669     memset(matroska, 0, sizeof(MatroskaDemuxContext));
2670
2671     return 0;
2672 }
2673
2674 static AVInputFormat matroska_iformat = {
2675     "matroska",
2676     "Matroska file format",
2677     sizeof(MatroskaDemuxContext),
2678     matroska_probe,
2679     matroska_read_header,
2680     matroska_read_packet,
2681     matroska_read_close,
2682 };
2683
2684 int
2685 matroska_init(void)
2686 {
2687     av_register_input_format(&matroska_iformat);
2688     return 0;
2689 }