]> git.sesse.net Git - ffmpeg/blob - libavformat/matroska.c
add missing #includes
[ffmpeg] / libavformat / matroska.c
1 /*
2  * Matroska file demuxer (no muxer yet)
3  * Copyright (c) 2003-2004 The ffmpeg Project
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
19
20 /**
21  * @file matroska.c
22  * Matroska file demuxer
23  * by Ronald Bultje <rbultje@ronald.bitfreak.net>
24  * with a little help from Moritz Bunkus <moritz@bunkus.org>
25  * Specs available on the matroska project page:
26  * http://www.matroska.org/.
27  */
28
29 #include "avformat.h"
30 /* For codec_get_bmp_id and codec_get_wav_id. */
31 #include "avi.h"
32 #include "intfloat_readwrite.h"
33
34 /* EBML version supported */
35 #define EBML_VERSION 1
36
37 /* top-level master-IDs */
38 #define EBML_ID_HEADER             0x1A45DFA3
39
40 /* IDs in the HEADER master */
41 #define EBML_ID_EBMLVERSION        0x4286
42 #define EBML_ID_EBMLREADVERSION    0x42F7
43 #define EBML_ID_EBMLMAXIDLENGTH    0x42F2
44 #define EBML_ID_EBMLMAXSIZELENGTH  0x42F3
45 #define EBML_ID_DOCTYPE            0x4282
46 #define EBML_ID_DOCTYPEVERSION     0x4287
47 #define EBML_ID_DOCTYPEREADVERSION 0x4285
48
49 /* general EBML types */
50 #define EBML_ID_VOID               0xEC
51
52 /*
53  * Matroska element IDs. max. 32-bit.
54  */
55
56 /* toplevel segment */
57 #define MATROSKA_ID_SEGMENT    0x18538067
58
59 /* matroska top-level master IDs */
60 #define MATROSKA_ID_INFO       0x1549A966
61 #define MATROSKA_ID_TRACKS     0x1654AE6B
62 #define MATROSKA_ID_CUES       0x1C53BB6B
63 #define MATROSKA_ID_TAGS       0x1254C367
64 #define MATROSKA_ID_SEEKHEAD   0x114D9B74
65 #define MATROSKA_ID_CLUSTER    0x1F43B675
66
67 /* IDs in the info master */
68 #define MATROSKA_ID_TIMECODESCALE 0x2AD7B1
69 #define MATROSKA_ID_DURATION   0x4489
70 #define MATROSKA_ID_WRITINGAPP 0x5741
71 #define MATROSKA_ID_MUXINGAPP  0x4D80
72 #define MATROSKA_ID_DATEUTC    0x4461
73
74 /* ID in the tracks master */
75 #define MATROSKA_ID_TRACKENTRY 0xAE
76
77 /* IDs in the trackentry master */
78 #define MATROSKA_ID_TRACKNUMBER 0xD7
79 #define MATROSKA_ID_TRACKUID   0x73C5
80 #define MATROSKA_ID_TRACKTYPE  0x83
81 #define MATROSKA_ID_TRACKAUDIO 0xE1
82 #define MATROSKA_ID_TRACKVIDEO 0xE0
83 #define MATROSKA_ID_CODECID    0x86
84 #define MATROSKA_ID_CODECPRIVATE 0x63A2
85 #define MATROSKA_ID_CODECNAME  0x258688
86 #define MATROSKA_ID_CODECINFOURL 0x3B4040
87 #define MATROSKA_ID_CODECDOWNLOADURL 0x26B240
88 #define MATROSKA_ID_TRACKNAME  0x536E
89 #define MATROSKA_ID_TRACKLANGUAGE 0x22B59C
90 #define MATROSKA_ID_TRACKFLAGENABLED 0xB9
91 #define MATROSKA_ID_TRACKFLAGDEFAULT 0x88
92 #define MATROSKA_ID_TRACKFLAGLACING 0x9C
93 #define MATROSKA_ID_TRACKMINCACHE 0x6DE7
94 #define MATROSKA_ID_TRACKMAXCACHE 0x6DF8
95 #define MATROSKA_ID_TRACKDEFAULTDURATION 0x23E383
96
97 /* IDs in the trackvideo master */
98 #define MATROSKA_ID_VIDEOFRAMERATE 0x2383E3
99 #define MATROSKA_ID_VIDEODISPLAYWIDTH 0x54B0
100 #define MATROSKA_ID_VIDEODISPLAYHEIGHT 0x54BA
101 #define MATROSKA_ID_VIDEOPIXELWIDTH 0xB0
102 #define MATROSKA_ID_VIDEOPIXELHEIGHT 0xBA
103 #define MATROSKA_ID_VIDEOFLAGINTERLACED 0x9A
104 #define MATROSKA_ID_VIDEOSTEREOMODE 0x53B9
105 #define MATROSKA_ID_VIDEOASPECTRATIO 0x54B3
106 #define MATROSKA_ID_VIDEOCOLOURSPACE 0x2EB524
107
108 /* IDs in the trackaudio master */
109 #define MATROSKA_ID_AUDIOSAMPLINGFREQ 0xB5
110 #define MATROSKA_ID_AUDIOBITDEPTH 0x6264
111 #define MATROSKA_ID_AUDIOCHANNELS 0x9F
112
113 /* ID in the cues master */
114 #define MATROSKA_ID_POINTENTRY 0xBB
115
116 /* IDs in the pointentry master */
117 #define MATROSKA_ID_CUETIME    0xB3
118 #define MATROSKA_ID_CUETRACKPOSITION 0xB7
119
120 /* IDs in the cuetrackposition master */
121 #define MATROSKA_ID_CUETRACK   0xF7
122 #define MATROSKA_ID_CUECLUSTERPOSITION 0xF1
123
124 /* IDs in the tags master */
125 /* TODO */
126
127 /* IDs in the seekhead master */
128 #define MATROSKA_ID_SEEKENTRY  0x4DBB
129
130 /* IDs in the seekpoint master */
131 #define MATROSKA_ID_SEEKID     0x53AB
132 #define MATROSKA_ID_SEEKPOSITION 0x53AC
133
134 /* IDs in the cluster master */
135 #define MATROSKA_ID_CLUSTERTIMECODE 0xE7
136 #define MATROSKA_ID_BLOCKGROUP 0xA0
137
138 /* IDs in the blockgroup master */
139 #define MATROSKA_ID_BLOCK      0xA1
140 #define MATROSKA_ID_BLOCKDURATION 0x9B
141 #define MATROSKA_ID_BLOCKREFERENCE 0xFB
142
143 typedef enum {
144   MATROSKA_TRACK_TYPE_VIDEO    = 0x1,
145   MATROSKA_TRACK_TYPE_AUDIO    = 0x2,
146   MATROSKA_TRACK_TYPE_COMPLEX  = 0x3,
147   MATROSKA_TRACK_TYPE_LOGO     = 0x10,
148   MATROSKA_TRACK_TYPE_SUBTITLE = 0x11,
149   MATROSKA_TRACK_TYPE_CONTROL  = 0x20,
150 } MatroskaTrackType;
151
152 typedef enum {
153   MATROSKA_EYE_MODE_MONO  = 0x0,
154   MATROSKA_EYE_MODE_RIGHT = 0x1,
155   MATROSKA_EYE_MODE_LEFT  = 0x2,
156   MATROSKA_EYE_MODE_BOTH  = 0x3,
157 } MatroskaEyeMode;
158
159 typedef enum {
160   MATROSKA_ASPECT_RATIO_MODE_FREE  = 0x0,
161   MATROSKA_ASPECT_RATIO_MODE_KEEP  = 0x1,
162   MATROSKA_ASPECT_RATIO_MODE_FIXED = 0x2,
163 } MatroskaAspectRatioMode;
164
165 /*
166  * These aren't in any way "matroska-form" things,
167  * it's just something I use in the muxer/demuxer.
168  */
169
170 typedef enum {
171   MATROSKA_TRACK_ENABLED = (1<<0),
172   MATROSKA_TRACK_DEFAULT = (1<<1),
173   MATROSKA_TRACK_LACING  = (1<<2),
174   MATROSKA_TRACK_SHIFT   = (1<<16)
175 } MatroskaTrackFlags;
176
177 typedef enum {
178   MATROSKA_VIDEOTRACK_INTERLACED = (MATROSKA_TRACK_SHIFT<<0)
179 } MatroskaVideoTrackFlags;
180
181 /*
182  * Matroska Codec IDs. Strings.
183  */
184
185 #define MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC   "V_MS/VFW/FOURCC"
186 #define MATROSKA_CODEC_ID_VIDEO_UNCOMPRESSED "V_UNCOMPRESSED"
187 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_SP     "V_MPEG4/ISO/SP"
188 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_ASP    "V_MPEG4/ISO/ASP"
189 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_AP     "V_MPEG4/ISO/AP"
190 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_AVC    "V_MPEG4/ISO/AVC"
191 #define MATROSKA_CODEC_ID_VIDEO_MSMPEG4V3    "V_MPEG4/MS/V3"
192 #define MATROSKA_CODEC_ID_VIDEO_MPEG1        "V_MPEG1"
193 #define MATROSKA_CODEC_ID_VIDEO_MPEG2        "V_MPEG2"
194 #define MATROSKA_CODEC_ID_VIDEO_MJPEG        "V_MJPEG"
195 /* TODO: Real/Quicktime */
196
197 #define MATROSKA_CODEC_ID_AUDIO_ACM          "A_MS/ACM"
198 #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L1     "A_MPEG/L1"
199 #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L2     "A_MPEG/L2"
200 #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L3     "A_MPEG/L3"
201 #define MATROSKA_CODEC_ID_AUDIO_PCM_INT_BE   "A_PCM/INT/BIG"
202 #define MATROSKA_CODEC_ID_AUDIO_PCM_INT_LE   "A_PCM/INT/LIT"
203 #define MATROSKA_CODEC_ID_AUDIO_PCM_FLOAT    "A_PCM/FLOAT/IEEE"
204 #define MATROSKA_CODEC_ID_AUDIO_AC3          "A_AC3"
205 #define MATROSKA_CODEC_ID_AUDIO_DTS          "A_DTS"
206 #define MATROSKA_CODEC_ID_AUDIO_VORBIS       "A_VORBIS"
207 #define MATROSKA_CODEC_ID_AUDIO_ACM          "A_MS/ACM"
208 #define MATROSKA_CODEC_ID_AUDIO_MPEG2        "A_AAC/MPEG2/"
209 #define MATROSKA_CODEC_ID_AUDIO_MPEG4        "A_AAC/MPEG4/"
210 /* TODO: AC3-9/10 (?), Real, Musepack, Quicktime */
211
212 /* max. depth in the EBML tree structure */
213 #define EBML_MAX_DEPTH 16
214
215 typedef struct Track {
216     MatroskaTrackType type;
217
218     /* Unique track number and track ID. stream_index is the index that
219      * the calling app uses for this track. */
220     uint32_t num,
221         uid,
222         stream_index;
223
224     char *name,
225         *language;
226
227     char *codec_id,
228         *codec_name;
229
230     unsigned char *codec_priv;
231     int codec_priv_size;
232
233     int64_t default_duration;
234     MatroskaTrackFlags flags;
235 } MatroskaTrack;
236
237 typedef struct MatroskaVideoTrack {
238     MatroskaTrack track;
239
240     int pixel_width,
241         pixel_height,
242         display_width,
243         display_height;
244
245     uint32_t fourcc;
246
247     MatroskaAspectRatioMode ar_mode;
248     MatroskaEyeMode eye_mode;
249
250     //..
251 } MatroskaVideoTrack;
252
253 typedef struct MatroskaAudioTrack {
254     MatroskaTrack track;
255
256     int channels,
257         bitdepth,
258         samplerate;
259     //..
260 } MatroskaAudioTrack;
261
262 typedef struct MatroskaSubtitleTrack {
263     MatroskaTrack track;
264
265     //..
266 } MatroskaSubtitleTrack;
267
268 typedef struct MatroskaLevel {
269     uint64_t start, length;
270 } MatroskaLevel;
271
272 typedef struct MatroskaDemuxIndex {
273   uint64_t        pos;   /* of the corresponding *cluster*! */
274   uint16_t        track; /* reference to 'num' */
275   uint64_t        time;  /* in nanoseconds */
276 } MatroskaDemuxIndex;
277
278 typedef struct MatroskaDemuxContext {
279     AVFormatContext *ctx;
280
281     /* ebml stuff */
282     int num_levels;
283     MatroskaLevel levels[EBML_MAX_DEPTH];
284     int level_up;
285
286     /* matroska stuff */
287     char *writing_app,
288         *muxing_app;
289     int64_t created;
290
291     /* timescale in the file */
292     int64_t time_scale;
293
294     /* length, position (time, ns) */
295     int64_t duration,
296         pos;
297
298     /* num_streams is the number of streams that av_new_stream() was called
299      * for ( = that are available to the calling program). */
300     int num_tracks, num_streams;
301     MatroskaTrack *tracks[MAX_STREAMS];
302
303     /* cache for ID peeking */
304     uint32_t peek_id;
305
306     /* byte position of the segment inside the stream */
307     offset_t segment_start;
308
309     /* The packet queue. */
310     AVPacket **packets;
311     int num_packets;
312
313     /* have we already parse metadata/cues/clusters? */
314     int metadata_parsed,
315         index_parsed,
316         done;
317
318     /* The index for seeking. */
319     int num_indexes;
320     MatroskaDemuxIndex *index;
321 } MatroskaDemuxContext;
322
323 /*
324  * The first few functions handle EBML file parsing. The rest
325  * is the document interpretation. Matroska really just is a
326  * EBML file.
327  */
328
329 /*
330  * Return: the amount of levels in the hierarchy that the
331  * current element lies higher than the previous one.
332  * The opposite isn't done - that's auto-done using master
333  * element reading.
334  */
335
336 static int
337 ebml_read_element_level_up (MatroskaDemuxContext *matroska)
338 {
339     ByteIOContext *pb = &matroska->ctx->pb;
340     offset_t pos = url_ftell(pb);
341     int num = 0;
342
343     while (matroska->num_levels > 0) {
344         MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
345
346         if (pos >= level->start + level->length) {
347             matroska->num_levels--;
348             num++;
349         } else {
350             break;
351         }
352     }
353
354     return num;
355 }
356
357 /*
358  * Read: an "EBML number", which is defined as a variable-length
359  * array of bytes. The first byte indicates the length by giving a
360  * number of 0-bits followed by a one. The position of the first
361  * "one" bit inside the first byte indicates the length of this
362  * number.
363  * Returns: num. of bytes read. < 0 on error.
364  */
365
366 static int
367 ebml_read_num (MatroskaDemuxContext *matroska,
368                int                   max_size,
369                uint64_t             *number)
370 {
371     ByteIOContext *pb = &matroska->ctx->pb;
372     int len_mask = 0x80, read = 1, n = 1;
373     int64_t total = 0;
374
375     /* the first byte tells us the length in bytes - get_byte() can normally
376      * return 0, but since that's not a valid first ebmlID byte, we can
377      * use it safely here to catch EOS. */
378     if (!(total = get_byte(pb))) {
379         /* we might encounter EOS here */
380         if (!url_feof(pb)) {
381             offset_t pos = url_ftell(pb);
382             av_log(matroska->ctx, AV_LOG_ERROR,
383                    "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
384                    pos, pos);
385         }
386         return AVERROR_IO; /* EOS or actual I/O error */
387     }
388
389     /* get the length of the EBML number */
390     while (read <= max_size && !(total & len_mask)) {
391         read++;
392         len_mask >>= 1;
393     }
394     if (read > max_size) {
395         offset_t pos = url_ftell(pb) - 1;
396         av_log(matroska->ctx, AV_LOG_ERROR,
397                "Invalid EBML number size tag 0x%02x at pos %"PRIu64" (0x%"PRIx64")\n",
398                (uint8_t) total, pos, pos);
399         return AVERROR_INVALIDDATA;
400     }
401
402     /* read out length */
403     total &= ~len_mask;
404     while (n++ < read)
405         total = (total << 8) | get_byte(pb);
406
407     *number = total;
408
409     return read;
410 }
411
412 /*
413  * Read: the element content data ID.
414  * Return: the number of bytes read or < 0 on error.
415  */
416
417 static int
418 ebml_read_element_id (MatroskaDemuxContext *matroska,
419                       uint32_t             *id,
420                       int                  *level_up)
421 {
422     int read;
423     uint64_t total;
424
425     /* if we re-call this, use our cached ID */
426     if (matroska->peek_id != 0) {
427         if (level_up)
428             *level_up = 0;
429         *id = matroska->peek_id;
430         return 0;
431     }
432
433     /* read out the "EBML number", include tag in ID */
434     if ((read = ebml_read_num(matroska, 4, &total)) < 0)
435         return read;
436     *id = matroska->peek_id  = total | (1 << (read * 7));
437
438     /* level tracking */
439     if (level_up)
440         *level_up = ebml_read_element_level_up(matroska);
441
442     return read;
443 }
444
445 /*
446  * Read: element content length.
447  * Return: the number of bytes read or < 0 on error.
448  */
449
450 static int
451 ebml_read_element_length (MatroskaDemuxContext *matroska,
452                           uint64_t             *length)
453 {
454     /* clear cache since we're now beyond that data point */
455     matroska->peek_id = 0;
456
457     /* read out the "EBML number", include tag in ID */
458     return ebml_read_num(matroska, 8, length);
459 }
460
461 /*
462  * Return: the ID of the next element, or 0 on error.
463  * Level_up contains the amount of levels that this
464  * next element lies higher than the previous one.
465  */
466
467 static uint32_t
468 ebml_peek_id (MatroskaDemuxContext *matroska,
469               int                  *level_up)
470 {
471     uint32_t id;
472
473     assert(level_up != NULL);
474
475     if (ebml_read_element_id(matroska, &id, level_up) < 0)
476         return 0;
477
478     return id;
479 }
480
481 /*
482  * Seek to a given offset.
483  * 0 is success, -1 is failure.
484  */
485
486 static int
487 ebml_read_seek (MatroskaDemuxContext *matroska,
488                 offset_t              offset)
489 {
490     ByteIOContext *pb = &matroska->ctx->pb;
491
492     /* clear ID cache, if any */
493     matroska->peek_id = 0;
494
495     return (url_fseek(pb, offset, SEEK_SET) == offset) ? 0 : -1;
496 }
497
498 /*
499  * Skip the next element.
500  * 0 is success, -1 is failure.
501  */
502
503 static int
504 ebml_read_skip (MatroskaDemuxContext *matroska)
505 {
506     ByteIOContext *pb = &matroska->ctx->pb;
507     uint32_t id;
508     uint64_t length;
509     int res;
510
511     if ((res = ebml_read_element_id(matroska, &id, NULL)) < 0 ||
512         (res = ebml_read_element_length(matroska, &length)) < 0)
513         return res;
514
515     url_fskip(pb, length);
516
517     return 0;
518 }
519
520 /*
521  * Read the next element as an unsigned int.
522  * 0 is success, < 0 is failure.
523  */
524
525 static int
526 ebml_read_uint (MatroskaDemuxContext *matroska,
527                 uint32_t             *id,
528                 uint64_t             *num)
529 {
530     ByteIOContext *pb = &matroska->ctx->pb;
531     int n = 0, size, res;
532     uint64_t rlength;
533
534     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
535         (res = ebml_read_element_length(matroska, &rlength)) < 0)
536         return res;
537     size = rlength;
538     if (size < 1 || size > 8) {
539         offset_t pos = url_ftell(pb);
540         av_log(matroska->ctx, AV_LOG_ERROR,
541                "Invalid uint element size %d at position %"PRId64" (0x%"PRIx64")\n",
542                 size, pos, pos);
543         return AVERROR_INVALIDDATA;
544     }
545
546     /* big-endian ordening; build up number */
547     *num = 0;
548     while (n++ < size)
549         *num = (*num << 8) | get_byte(pb);
550
551     return 0;
552 }
553
554 /*
555  * Read the next element as a signed int.
556  * 0 is success, < 0 is failure.
557  */
558
559 static int
560 ebml_read_sint (MatroskaDemuxContext *matroska,
561                 uint32_t             *id,
562                 int64_t              *num)
563 {
564     ByteIOContext *pb = &matroska->ctx->pb;
565     int size, n = 1, negative = 0, res;
566     uint64_t rlength;
567
568     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
569         (res = ebml_read_element_length(matroska, &rlength)) < 0)
570         return res;
571     size = rlength;
572     if (size < 1 || size > 8) {
573         offset_t pos = url_ftell(pb);
574         av_log(matroska->ctx, AV_LOG_ERROR,
575                "Invalid sint element size %d at position %"PRId64" (0x%"PRIx64")\n",
576                 size, pos, pos);
577         return AVERROR_INVALIDDATA;
578     }
579     if ((*num = get_byte(pb)) & 0x80) {
580         negative = 1;
581         *num &= ~0x80;
582     }
583     *num = 0;
584     while (n++ < size)
585         *num = (*num << 8) | get_byte(pb);
586
587     /* make signed */
588     if (negative)
589         *num = *num - (1LL << ((8 * size) - 1));
590
591     return 0;
592 }
593
594 /*
595  * Read the next element as a float.
596  * 0 is success, < 0 is failure.
597  */
598
599 static int
600 ebml_read_float (MatroskaDemuxContext *matroska,
601                  uint32_t             *id,
602                  double               *num)
603 {
604     ByteIOContext *pb = &matroska->ctx->pb;
605     int size, res;
606     uint64_t rlength;
607
608     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
609         (res = ebml_read_element_length(matroska, &rlength)) < 0)
610         return res;
611     size = rlength;
612
613     if (size == 4) {
614         *num= av_int2flt(get_be32(pb));
615     } else if(size==8){
616         *num= av_int2dbl(get_be64(pb));
617     } else if(size==10){
618         av_log(matroska->ctx, AV_LOG_ERROR,
619                "FIXME! 10-byte floats unimplemented\n");
620         return AVERROR_UNKNOWN;
621     } else{
622         offset_t pos = url_ftell(pb);
623         av_log(matroska->ctx, AV_LOG_ERROR,
624                "Invalid float element size %d at position %"PRIu64" (0x%"PRIx64")\n",
625                size, pos, pos);
626         return AVERROR_INVALIDDATA;
627     }
628
629     return 0;
630 }
631
632 /*
633  * Read the next element as an ASCII string.
634  * 0 is success, < 0 is failure.
635  */
636
637 static int
638 ebml_read_ascii (MatroskaDemuxContext *matroska,
639                  uint32_t             *id,
640                  char                **str)
641 {
642     ByteIOContext *pb = &matroska->ctx->pb;
643     int size, res;
644     uint64_t rlength;
645
646     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
647         (res = ebml_read_element_length(matroska, &rlength)) < 0)
648         return res;
649     size = rlength;
650
651     /* ebml strings are usually not 0-terminated, so we allocate one
652      * byte more, read the string and NULL-terminate it ourselves. */
653     if (size < 0 || !(*str = av_malloc(size + 1))) {
654         av_log(matroska->ctx, AV_LOG_ERROR, "Memory allocation failed\n");
655         return AVERROR_NOMEM;
656     }
657     if (get_buffer(pb, (uint8_t *) *str, size) != size) {
658         offset_t pos = url_ftell(pb);
659         av_log(matroska->ctx, AV_LOG_ERROR,
660                "Read error at pos. %"PRIu64" (0x%"PRIx64")\n", pos, pos);
661         return AVERROR_IO;
662     }
663     (*str)[size] = '\0';
664
665     return 0;
666 }
667
668 /*
669  * Read the next element as a UTF-8 string.
670  * 0 is success, < 0 is failure.
671  */
672
673 static int
674 ebml_read_utf8 (MatroskaDemuxContext *matroska,
675                 uint32_t             *id,
676                 char                **str)
677 {
678   return ebml_read_ascii(matroska, id, str);
679 }
680
681 /*
682  * Read the next element as a date (nanoseconds since 1/1/2000).
683  * 0 is success, < 0 is failure.
684  */
685
686 static int
687 ebml_read_date (MatroskaDemuxContext *matroska,
688                 uint32_t             *id,
689                 int64_t              *date)
690 {
691   return ebml_read_sint(matroska, id, date);
692 }
693
694 /*
695  * Read the next element, but only the header. The contents
696  * are supposed to be sub-elements which can be read separately.
697  * 0 is success, < 0 is failure.
698  */
699
700 static int
701 ebml_read_master (MatroskaDemuxContext *matroska,
702                   uint32_t             *id)
703 {
704     ByteIOContext *pb = &matroska->ctx->pb;
705     uint64_t length;
706     MatroskaLevel *level;
707     int res;
708
709     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
710         (res = ebml_read_element_length(matroska, &length)) < 0)
711         return res;
712
713     /* protect... (Heaven forbids that the '>' is true) */
714     if (matroska->num_levels >= EBML_MAX_DEPTH) {
715         av_log(matroska->ctx, AV_LOG_ERROR,
716                "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
717         return AVERROR_NOTSUPP;
718     }
719
720     /* remember level */
721     level = &matroska->levels[matroska->num_levels++];
722     level->start = url_ftell(pb);
723     level->length = length;
724
725     return 0;
726 }
727
728 /*
729  * Read the next element as binary data.
730  * 0 is success, < 0 is failure.
731  */
732
733 static int
734 ebml_read_binary (MatroskaDemuxContext *matroska,
735                   uint32_t             *id,
736                   uint8_t             **binary,
737                   int                  *size)
738 {
739     ByteIOContext *pb = &matroska->ctx->pb;
740     uint64_t rlength;
741     int res;
742
743     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
744         (res = ebml_read_element_length(matroska, &rlength)) < 0)
745         return res;
746     *size = rlength;
747
748     if (!(*binary = av_malloc(*size))) {
749         av_log(matroska->ctx, AV_LOG_ERROR,
750                "Memory allocation error\n");
751         return AVERROR_NOMEM;
752     }
753
754     if (get_buffer(pb, *binary, *size) != *size) {
755         offset_t pos = url_ftell(pb);
756         av_log(matroska->ctx, AV_LOG_ERROR,
757                "Read error at pos. %"PRIu64" (0x%"PRIx64")\n", pos, pos);
758         return AVERROR_IO;
759     }
760
761     return 0;
762 }
763
764 /*
765  * Read signed/unsigned "EBML" numbers.
766  * Return: number of bytes processed, < 0 on error.
767  * XXX: use ebml_read_num().
768  */
769
770 static int
771 matroska_ebmlnum_uint (uint8_t  *data,
772                        uint32_t  size,
773                        uint64_t *num)
774 {
775     int len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
776     uint64_t total;
777
778     if (size <= 0)
779         return AVERROR_INVALIDDATA;
780
781     total = data[0];
782     while (read <= 8 && !(total & len_mask)) {
783         read++;
784         len_mask >>= 1;
785     }
786     if (read > 8)
787         return AVERROR_INVALIDDATA;
788
789     if ((total &= (len_mask - 1)) == len_mask - 1)
790         num_ffs++;
791     if (size < read)
792         return AVERROR_INVALIDDATA;
793     while (n < read) {
794         if (data[n] == 0xff)
795             num_ffs++;
796         total = (total << 8) | data[n];
797         n++;
798     }
799
800     if (read == num_ffs)
801         *num = (uint64_t)-1;
802     else
803         *num = total;
804
805     return read;
806 }
807
808 /*
809  * Same as above, but signed.
810  */
811
812 static int
813 matroska_ebmlnum_sint (uint8_t  *data,
814                        uint32_t  size,
815                        int64_t  *num)
816 {
817     uint64_t unum;
818     int res;
819
820     /* read as unsigned number first */
821     if ((res = matroska_ebmlnum_uint(data, size, &unum)) < 0)
822         return res;
823
824     /* make signed (weird way) */
825     if (unum == (uint64_t)-1)
826         *num = INT64_MAX;
827     else
828         *num = unum - ((1LL << ((7 * res) - 1)) - 1);
829
830     return res;
831 }
832
833 /*
834  * Read an EBML header.
835  * 0 is success, < 0 is failure.
836  */
837
838 static int
839 ebml_read_header (MatroskaDemuxContext *matroska,
840                   char                **doctype,
841                   int                  *version)
842 {
843     uint32_t id;
844     int level_up, res = 0;
845
846     /* default init */
847     if (doctype)
848         *doctype = NULL;
849     if (version)
850         *version = 1;
851
852     if (!(id = ebml_peek_id(matroska, &level_up)) ||
853         level_up != 0 || id != EBML_ID_HEADER) {
854         av_log(matroska->ctx, AV_LOG_ERROR,
855                "This is not an EBML file (id=0x%x/0x%x)\n", id, EBML_ID_HEADER);
856         return AVERROR_INVALIDDATA;
857     }
858     if ((res = ebml_read_master(matroska, &id)) < 0)
859         return res;
860
861     while (res == 0) {
862         if (!(id = ebml_peek_id(matroska, &level_up)))
863             return AVERROR_IO;
864
865         /* end-of-header */
866         if (level_up)
867             break;
868
869         switch (id) {
870             /* is our read version uptodate? */
871             case EBML_ID_EBMLREADVERSION: {
872                 uint64_t num;
873
874                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
875                     return res;
876                 if (num > EBML_VERSION) {
877                     av_log(matroska->ctx, AV_LOG_ERROR,
878                            "EBML version %"PRIu64" (> %d) is not supported\n",
879                            num, EBML_VERSION);
880                     return AVERROR_INVALIDDATA;
881                 }
882                 break;
883             }
884
885             /* we only handle 8 byte lengths at max */
886             case EBML_ID_EBMLMAXSIZELENGTH: {
887                 uint64_t num;
888
889                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
890                     return res;
891                 if (num > sizeof(uint64_t)) {
892                     av_log(matroska->ctx, AV_LOG_ERROR,
893                            "Integers of size %"PRIu64" (> %zd) not supported\n",
894                            num, sizeof(uint64_t));
895                     return AVERROR_INVALIDDATA;
896                 }
897                 break;
898             }
899
900             /* we handle 4 byte IDs at max */
901             case EBML_ID_EBMLMAXIDLENGTH: {
902                 uint64_t num;
903
904                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
905                     return res;
906                 if (num > sizeof(uint32_t)) {
907                     av_log(matroska->ctx, AV_LOG_ERROR,
908                            "IDs of size %"PRIu64" (> %zu) not supported\n",
909                             num, sizeof(uint32_t));
910                     return AVERROR_INVALIDDATA;
911                 }
912                 break;
913             }
914
915             case EBML_ID_DOCTYPE: {
916                 char *text;
917
918                 if ((res = ebml_read_ascii(matroska, &id, &text)) < 0)
919                     return res;
920                 if (doctype) {
921                     if (*doctype)
922                         av_free(*doctype);
923                     *doctype = text;
924                 } else
925                     av_free(text);
926                 break;
927             }
928
929             case EBML_ID_DOCTYPEREADVERSION: {
930                 uint64_t num;
931
932                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
933                     return res;
934                 if (version)
935                     *version = num;
936                 break;
937             }
938
939             default:
940                 av_log(matroska->ctx, AV_LOG_INFO,
941                        "Unknown data type 0x%x in EBML header", id);
942                 /* pass-through */
943
944             case EBML_ID_VOID:
945             /* we ignore these two, as they don't tell us anything we
946              * care about */
947             case EBML_ID_EBMLVERSION:
948             case EBML_ID_DOCTYPEVERSION:
949                 res = ebml_read_skip (matroska);
950                 break;
951         }
952     }
953
954     return 0;
955 }
956
957 /*
958  * Put one packet in an application-supplied AVPacket struct.
959  * Returns 0 on success or -1 on failure.
960  */
961
962 static int
963 matroska_deliver_packet (MatroskaDemuxContext *matroska,
964                          AVPacket             *pkt)
965 {
966     if (matroska->num_packets > 0) {
967         memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
968         av_free(matroska->packets[0]);
969         if (matroska->num_packets > 1) {
970             memmove(&matroska->packets[0], &matroska->packets[1],
971                     (matroska->num_packets - 1) * sizeof(AVPacket *));
972             matroska->packets =
973                 av_realloc(matroska->packets, (matroska->num_packets - 1) *
974                            sizeof(AVPacket *));
975         } else {
976             av_free(matroska->packets);
977             matroska->packets = NULL;
978         }
979         matroska->num_packets--;
980         return 0;
981     }
982
983     return -1;
984 }
985
986 /*
987  * Put a packet into our internal queue. Will be delivered to the
988  * user/application during the next get_packet() call.
989  */
990
991 static void
992 matroska_queue_packet (MatroskaDemuxContext *matroska,
993                        AVPacket             *pkt)
994 {
995     matroska->packets =
996         av_realloc(matroska->packets, (matroska->num_packets + 1) *
997                    sizeof(AVPacket *));
998     matroska->packets[matroska->num_packets] = pkt;
999     matroska->num_packets++;
1000 }
1001
1002 /*
1003  * Autodetecting...
1004  */
1005
1006 static int
1007 matroska_probe (AVProbeData *p)
1008 {
1009     uint64_t total = 0;
1010     int len_mask = 0x80, size = 1, n = 1;
1011     uint8_t probe_data[] = { 'm', 'a', 't', 'r', 'o', 's', 'k', 'a' };
1012
1013     if (p->buf_size < 5)
1014         return 0;
1015
1016     /* ebml header? */
1017     if ((p->buf[0] << 24 | p->buf[1] << 16 |
1018          p->buf[2] << 8 | p->buf[3]) != EBML_ID_HEADER)
1019         return 0;
1020
1021     /* length of header */
1022     total = p->buf[4];
1023     while (size <= 8 && !(total & len_mask)) {
1024         size++;
1025         len_mask >>= 1;
1026     }
1027     if (size > 8)
1028       return 0;
1029     total &= (len_mask - 1);
1030     while (n < size)
1031         total = (total << 8) | p->buf[4 + n++];
1032
1033     /* does the probe data contain the whole header? */
1034     if (p->buf_size < 4 + size + total)
1035       return 0;
1036
1037     /* the header must contain the document type 'matroska'. For now,
1038      * we don't parse the whole header but simply check for the
1039      * availability of that array of characters inside the header.
1040      * Not fully fool-proof, but good enough. */
1041     for (n = 4 + size; n < 4 + size + total - sizeof(probe_data); n++)
1042         if (!memcmp (&p->buf[n], probe_data, sizeof(probe_data)))
1043             return AVPROBE_SCORE_MAX;
1044
1045     return 0;
1046 }
1047
1048 /*
1049  * From here on, it's all XML-style DTD stuff... Needs no comments.
1050  */
1051
1052 static int
1053 matroska_parse_info (MatroskaDemuxContext *matroska)
1054 {
1055     int res = 0;
1056     uint32_t id;
1057
1058     av_log(matroska->ctx, AV_LOG_DEBUG, "Parsing info...\n");
1059
1060     while (res == 0) {
1061         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1062             res = AVERROR_IO;
1063             break;
1064         } else if (matroska->level_up) {
1065             matroska->level_up--;
1066             break;
1067         }
1068
1069         switch (id) {
1070             /* cluster timecode */
1071             case MATROSKA_ID_TIMECODESCALE: {
1072                 uint64_t num;
1073                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1074                     break;
1075                 matroska->time_scale = num;
1076                 break;
1077             }
1078
1079             case MATROSKA_ID_DURATION: {
1080                 double num;
1081                 if ((res = ebml_read_float(matroska, &id, &num)) < 0)
1082                     break;
1083                 matroska->duration = num * matroska->time_scale;
1084                 break;
1085             }
1086
1087             case MATROSKA_ID_WRITINGAPP: {
1088                 char *text;
1089                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1090                     break;
1091                 matroska->writing_app = text;
1092                 break;
1093             }
1094
1095             case MATROSKA_ID_MUXINGAPP: {
1096                 char *text;
1097                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1098                     break;
1099                 matroska->muxing_app = text;
1100                 break;
1101             }
1102
1103             case MATROSKA_ID_DATEUTC: {
1104                 int64_t time;
1105                 if ((res = ebml_read_date(matroska, &id, &time)) < 0)
1106                     break;
1107                 matroska->created = time;
1108                 break;
1109             }
1110
1111             default:
1112                 av_log(matroska->ctx, AV_LOG_INFO,
1113                        "Unknown entry 0x%x in info header\n", id);
1114                 /* fall-through */
1115
1116             case EBML_ID_VOID:
1117                 res = ebml_read_skip(matroska);
1118                 break;
1119         }
1120
1121         if (matroska->level_up) {
1122             matroska->level_up--;
1123             break;
1124         }
1125     }
1126
1127     return res;
1128 }
1129
1130 static int
1131 matroska_add_stream (MatroskaDemuxContext *matroska)
1132 {
1133     int res = 0;
1134     uint32_t id;
1135     MatroskaTrack *track;
1136
1137     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing track, adding stream..,\n");
1138
1139     /* Allocate a generic track. As soon as we know its type we'll realloc. */
1140     track = av_mallocz(sizeof(MatroskaTrack));
1141     matroska->num_tracks++;
1142
1143     /* start with the master */
1144     if ((res = ebml_read_master(matroska, &id)) < 0)
1145         return res;
1146
1147     /* try reading the trackentry headers */
1148     while (res == 0) {
1149         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1150             res = AVERROR_IO;
1151             break;
1152         } else if (matroska->level_up > 0) {
1153             matroska->level_up--;
1154             break;
1155         }
1156
1157         switch (id) {
1158             /* track number (unique stream ID) */
1159             case MATROSKA_ID_TRACKNUMBER: {
1160                 uint64_t num;
1161                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1162                     break;
1163                 track->num = num;
1164                 break;
1165             }
1166
1167             /* track UID (unique identifier) */
1168             case MATROSKA_ID_TRACKUID: {
1169                 uint64_t num;
1170                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1171                     break;
1172                 track->uid = num;
1173                 break;
1174             }
1175
1176             /* track type (video, audio, combined, subtitle, etc.) */
1177             case MATROSKA_ID_TRACKTYPE: {
1178                 uint64_t num;
1179                 if (track->type != 0) {
1180                     av_log(matroska->ctx, AV_LOG_INFO,
1181                            "More than one tracktype in an entry - skip\n");
1182                     break;
1183                 }
1184                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1185                     break;
1186                 track->type = num;
1187
1188                 /* ok, so we're actually going to reallocate this thing */
1189                 switch (track->type) {
1190                     case MATROSKA_TRACK_TYPE_VIDEO:
1191                         track = (MatroskaTrack *)
1192                             av_realloc(track, sizeof(MatroskaVideoTrack));
1193                         break;
1194                     case MATROSKA_TRACK_TYPE_AUDIO:
1195                         track = (MatroskaTrack *)
1196                             av_realloc(track, sizeof(MatroskaAudioTrack));
1197                         ((MatroskaAudioTrack *)track)->channels = 1;
1198                         ((MatroskaAudioTrack *)track)->samplerate = 8000;
1199                         break;
1200                     case MATROSKA_TRACK_TYPE_SUBTITLE:
1201                         track = (MatroskaTrack *)
1202                             av_realloc(track, sizeof(MatroskaSubtitleTrack));
1203                         break;
1204                     case MATROSKA_TRACK_TYPE_COMPLEX:
1205                     case MATROSKA_TRACK_TYPE_LOGO:
1206                     case MATROSKA_TRACK_TYPE_CONTROL:
1207                     default:
1208                         av_log(matroska->ctx, AV_LOG_INFO,
1209                                "Unknown or unsupported track type 0x%x\n",
1210                                track->type);
1211                         track->type = 0;
1212                         break;
1213                 }
1214                 matroska->tracks[matroska->num_tracks - 1] = track;
1215                 break;
1216             }
1217
1218             /* tracktype specific stuff for video */
1219             case MATROSKA_ID_TRACKVIDEO: {
1220                 MatroskaVideoTrack *videotrack;
1221                 if (track->type != MATROSKA_TRACK_TYPE_VIDEO) {
1222                     av_log(matroska->ctx, AV_LOG_INFO,
1223                            "video data in non-video track - ignoring\n");
1224                     res = AVERROR_INVALIDDATA;
1225                     break;
1226                 } else if ((res = ebml_read_master(matroska, &id)) < 0)
1227                     break;
1228                 videotrack = (MatroskaVideoTrack *)track;
1229
1230                 while (res == 0) {
1231                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1232                         res = AVERROR_IO;
1233                         break;
1234                     } else if (matroska->level_up > 0) {
1235                         matroska->level_up--;
1236                         break;
1237                     }
1238
1239                     switch (id) {
1240                         /* fixme, this should be one-up, but I get it here */
1241                         case MATROSKA_ID_TRACKDEFAULTDURATION: {
1242                             uint64_t num;
1243                             if ((res = ebml_read_uint (matroska, &id,
1244                                                        &num)) < 0)
1245                                 break;
1246                             track->default_duration = num;
1247                             break;
1248                         }
1249
1250                         /* video framerate */
1251                         case MATROSKA_ID_VIDEOFRAMERATE: {
1252                             double num;
1253                             if ((res = ebml_read_float(matroska, &id,
1254                                                        &num)) < 0)
1255                                 break;
1256                             track->default_duration = 1000000000 * (1. / num);
1257                             break;
1258                         }
1259
1260                         /* width of the size to display the video at */
1261                         case MATROSKA_ID_VIDEODISPLAYWIDTH: {
1262                             uint64_t num;
1263                             if ((res = ebml_read_uint(matroska, &id,
1264                                                       &num)) < 0)
1265                                 break;
1266                             videotrack->display_width = num;
1267                             break;
1268                         }
1269
1270                         /* height of the size to display the video at */
1271                         case MATROSKA_ID_VIDEODISPLAYHEIGHT: {
1272                             uint64_t num;
1273                             if ((res = ebml_read_uint(matroska, &id,
1274                                                       &num)) < 0)
1275                                 break;
1276                             videotrack->display_height = num;
1277                             break;
1278                         }
1279
1280                         /* width of the video in the file */
1281                         case MATROSKA_ID_VIDEOPIXELWIDTH: {
1282                             uint64_t num;
1283                             if ((res = ebml_read_uint(matroska, &id,
1284                                                       &num)) < 0)
1285                                 break;
1286                             videotrack->pixel_width = num;
1287                             break;
1288                         }
1289
1290                         /* height of the video in the file */
1291                         case MATROSKA_ID_VIDEOPIXELHEIGHT: {
1292                             uint64_t num;
1293                             if ((res = ebml_read_uint(matroska, &id,
1294                                                       &num)) < 0)
1295                                 break;
1296                             videotrack->pixel_height = num;
1297                             break;
1298                         }
1299
1300                         /* whether the video is interlaced */
1301                         case MATROSKA_ID_VIDEOFLAGINTERLACED: {
1302                             uint64_t num;
1303                             if ((res = ebml_read_uint(matroska, &id,
1304                                                       &num)) < 0)
1305                                 break;
1306                             if (num)
1307                                 track->flags |=
1308                                     MATROSKA_VIDEOTRACK_INTERLACED;
1309                             else
1310                                 track->flags &=
1311                                     ~MATROSKA_VIDEOTRACK_INTERLACED;
1312                             break;
1313                         }
1314
1315                         /* stereo mode (whether the video has two streams,
1316                          * where one is for the left eye and the other for
1317                          * the right eye, which creates a 3D-like
1318                          * effect) */
1319                         case MATROSKA_ID_VIDEOSTEREOMODE: {
1320                             uint64_t num;
1321                             if ((res = ebml_read_uint(matroska, &id,
1322                                                       &num)) < 0)
1323                                 break;
1324                             if (num != MATROSKA_EYE_MODE_MONO &&
1325                                 num != MATROSKA_EYE_MODE_LEFT &&
1326                                 num != MATROSKA_EYE_MODE_RIGHT &&
1327                                 num != MATROSKA_EYE_MODE_BOTH) {
1328                                 av_log(matroska->ctx, AV_LOG_INFO,
1329                                        "Ignoring unknown eye mode 0x%x\n",
1330                                        (uint32_t) num);
1331                                 break;
1332                             }
1333                             videotrack->eye_mode = num;
1334                             break;
1335                         }
1336
1337                         /* aspect ratio behaviour */
1338                         case MATROSKA_ID_VIDEOASPECTRATIO: {
1339                             uint64_t num;
1340                             if ((res = ebml_read_uint(matroska, &id,
1341                                                       &num)) < 0)
1342                                 break;
1343                             if (num != MATROSKA_ASPECT_RATIO_MODE_FREE &&
1344                                 num != MATROSKA_ASPECT_RATIO_MODE_KEEP &&
1345                                 num != MATROSKA_ASPECT_RATIO_MODE_FIXED) {
1346                                 av_log(matroska->ctx, AV_LOG_INFO,
1347                                        "Ignoring unknown aspect ratio 0x%x\n",
1348                                        (uint32_t) num);
1349                                 break;
1350                             }
1351                             videotrack->ar_mode = num;
1352                             break;
1353                         }
1354
1355                         /* colourspace (only matters for raw video)
1356                          * fourcc */
1357                         case MATROSKA_ID_VIDEOCOLOURSPACE: {
1358                             uint64_t num;
1359                             if ((res = ebml_read_uint(matroska, &id,
1360                                                       &num)) < 0)
1361                                 break;
1362                             videotrack->fourcc = num;
1363                             break;
1364                         }
1365
1366                         default:
1367                             av_log(matroska->ctx, AV_LOG_INFO,
1368                                    "Unknown video track header entry "
1369                                    "0x%x - ignoring\n", id);
1370                             /* pass-through */
1371
1372                         case EBML_ID_VOID:
1373                             res = ebml_read_skip(matroska);
1374                             break;
1375                     }
1376
1377                     if (matroska->level_up) {
1378                         matroska->level_up--;
1379                         break;
1380                     }
1381                 }
1382                 break;
1383             }
1384
1385             /* tracktype specific stuff for audio */
1386             case MATROSKA_ID_TRACKAUDIO: {
1387                 MatroskaAudioTrack *audiotrack;
1388                 if (track->type != MATROSKA_TRACK_TYPE_AUDIO) {
1389                     av_log(matroska->ctx, AV_LOG_INFO,
1390                            "audio data in non-audio track - ignoring\n");
1391                     res = AVERROR_INVALIDDATA;
1392                     break;
1393                 } else if ((res = ebml_read_master(matroska, &id)) < 0)
1394                     break;
1395                 audiotrack = (MatroskaAudioTrack *)track;
1396
1397                 while (res == 0) {
1398                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1399                         res = AVERROR_IO;
1400                         break;
1401                     } else if (matroska->level_up > 0) {
1402                         matroska->level_up--;
1403                         break;
1404                     }
1405
1406                     switch (id) {
1407                         /* samplerate */
1408                         case MATROSKA_ID_AUDIOSAMPLINGFREQ: {
1409                             double num;
1410                             if ((res = ebml_read_float(matroska, &id,
1411                                                        &num)) < 0)
1412                                 break;
1413                             audiotrack->samplerate = num;
1414                             break;
1415                         }
1416
1417                             /* bitdepth */
1418                         case MATROSKA_ID_AUDIOBITDEPTH: {
1419                             uint64_t num;
1420                             if ((res = ebml_read_uint(matroska, &id,
1421                                                       &num)) < 0)
1422                                 break;
1423                             audiotrack->bitdepth = num;
1424                             break;
1425                         }
1426
1427                             /* channels */
1428                         case MATROSKA_ID_AUDIOCHANNELS: {
1429                             uint64_t num;
1430                             if ((res = ebml_read_uint(matroska, &id,
1431                                                       &num)) < 0)
1432                                 break;
1433                             audiotrack->channels = num;
1434                             break;
1435                         }
1436
1437                         default:
1438                             av_log(matroska->ctx, AV_LOG_INFO,
1439                                    "Unknown audio track header entry "
1440                                    "0x%x - ignoring\n", id);
1441                             /* pass-through */
1442
1443                         case EBML_ID_VOID:
1444                             res = ebml_read_skip(matroska);
1445                             break;
1446                     }
1447
1448                     if (matroska->level_up) {
1449                         matroska->level_up--;
1450                         break;
1451                     }
1452                 }
1453                 break;
1454             }
1455
1456                 /* codec identifier */
1457             case MATROSKA_ID_CODECID: {
1458                 char *text;
1459                 if ((res = ebml_read_ascii(matroska, &id, &text)) < 0)
1460                     break;
1461                 track->codec_id = text;
1462                 break;
1463             }
1464
1465                 /* codec private data */
1466             case MATROSKA_ID_CODECPRIVATE: {
1467                 uint8_t *data;
1468                 int size;
1469                 if ((res = ebml_read_binary(matroska, &id, &data, &size) < 0))
1470                     break;
1471                 track->codec_priv = data;
1472                 track->codec_priv_size = size;
1473                 break;
1474             }
1475
1476                 /* name of the codec */
1477             case MATROSKA_ID_CODECNAME: {
1478                 char *text;
1479                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1480                     break;
1481                 track->codec_name = text;
1482                 break;
1483             }
1484
1485                 /* name of this track */
1486             case MATROSKA_ID_TRACKNAME: {
1487                 char *text;
1488                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1489                     break;
1490                 track->name = text;
1491                 break;
1492             }
1493
1494                 /* language (matters for audio/subtitles, mostly) */
1495             case MATROSKA_ID_TRACKLANGUAGE: {
1496                 char *text;
1497                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1498                     break;
1499                 track->language = text;
1500                 break;
1501             }
1502
1503                 /* whether this is actually used */
1504             case MATROSKA_ID_TRACKFLAGENABLED: {
1505                 uint64_t num;
1506                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1507                     break;
1508                 if (num)
1509                     track->flags |= MATROSKA_TRACK_ENABLED;
1510                 else
1511                     track->flags &= ~MATROSKA_TRACK_ENABLED;
1512                 break;
1513             }
1514
1515                 /* whether it's the default for this track type */
1516             case MATROSKA_ID_TRACKFLAGDEFAULT: {
1517                 uint64_t num;
1518                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1519                     break;
1520                 if (num)
1521                     track->flags |= MATROSKA_TRACK_DEFAULT;
1522                 else
1523                     track->flags &= ~MATROSKA_TRACK_DEFAULT;
1524                 break;
1525             }
1526
1527                 /* lacing (like MPEG, where blocks don't end/start on frame
1528                  * boundaries) */
1529             case MATROSKA_ID_TRACKFLAGLACING: {
1530                 uint64_t num;
1531                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1532                     break;
1533                 if (num)
1534                     track->flags |= MATROSKA_TRACK_LACING;
1535                 else
1536                     track->flags &= ~MATROSKA_TRACK_LACING;
1537                 break;
1538             }
1539
1540                 /* default length (in time) of one data block in this track */
1541             case MATROSKA_ID_TRACKDEFAULTDURATION: {
1542                 uint64_t num;
1543                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1544                     break;
1545                 track->default_duration = num;
1546                 break;
1547             }
1548
1549             default:
1550                 av_log(matroska->ctx, AV_LOG_INFO,
1551                        "Unknown track header entry 0x%x - ignoring\n", id);
1552                 /* pass-through */
1553
1554             case EBML_ID_VOID:
1555             /* we ignore these because they're nothing useful. */
1556             case MATROSKA_ID_CODECINFOURL:
1557             case MATROSKA_ID_CODECDOWNLOADURL:
1558             case MATROSKA_ID_TRACKMINCACHE:
1559             case MATROSKA_ID_TRACKMAXCACHE:
1560                 res = ebml_read_skip(matroska);
1561                 break;
1562         }
1563
1564         if (matroska->level_up) {
1565             matroska->level_up--;
1566             break;
1567         }
1568     }
1569
1570     return res;
1571 }
1572
1573 static int
1574 matroska_parse_tracks (MatroskaDemuxContext *matroska)
1575 {
1576     int res = 0;
1577     uint32_t id;
1578
1579     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing tracks...\n");
1580
1581     while (res == 0) {
1582         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1583             res = AVERROR_IO;
1584             break;
1585         } else if (matroska->level_up) {
1586             matroska->level_up--;
1587             break;
1588         }
1589
1590         switch (id) {
1591             /* one track within the "all-tracks" header */
1592             case MATROSKA_ID_TRACKENTRY:
1593                 res = matroska_add_stream(matroska);
1594                 break;
1595
1596             default:
1597                 av_log(matroska->ctx, AV_LOG_INFO,
1598                        "Unknown entry 0x%x in track header\n", id);
1599                 /* fall-through */
1600
1601             case EBML_ID_VOID:
1602                 res = ebml_read_skip(matroska);
1603                 break;
1604         }
1605
1606         if (matroska->level_up) {
1607             matroska->level_up--;
1608             break;
1609         }
1610     }
1611
1612     return res;
1613 }
1614
1615 static int
1616 matroska_parse_index (MatroskaDemuxContext *matroska)
1617 {
1618     int res = 0;
1619     uint32_t id;
1620     MatroskaDemuxIndex idx;
1621
1622     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing index...\n");
1623
1624     while (res == 0) {
1625         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1626             res = AVERROR_IO;
1627             break;
1628         } else if (matroska->level_up) {
1629             matroska->level_up--;
1630             break;
1631         }
1632
1633         switch (id) {
1634             /* one single index entry ('point') */
1635             case MATROSKA_ID_POINTENTRY:
1636                 if ((res = ebml_read_master(matroska, &id)) < 0)
1637                     break;
1638
1639                 /* in the end, we hope to fill one entry with a
1640                  * timestamp, a file position and a tracknum */
1641                 idx.pos   = (uint64_t) -1;
1642                 idx.time  = (uint64_t) -1;
1643                 idx.track = (uint16_t) -1;
1644
1645                 while (res == 0) {
1646                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1647                         res = AVERROR_IO;
1648                         break;
1649                     } else if (matroska->level_up) {
1650                         matroska->level_up--;
1651                         break;
1652                     }
1653
1654                     switch (id) {
1655                         /* one single index entry ('point') */
1656                         case MATROSKA_ID_CUETIME: {
1657                             int64_t time;
1658                             if ((res = ebml_read_uint(matroska, &id,
1659                                                       &time)) < 0)
1660                                 break;
1661                             idx.time = time * matroska->time_scale;
1662                             break;
1663                         }
1664
1665                         /* position in the file + track to which it
1666                          * belongs */
1667                         case MATROSKA_ID_CUETRACKPOSITION:
1668                             if ((res = ebml_read_master(matroska, &id)) < 0)
1669                                 break;
1670
1671                             while (res == 0) {
1672                                 if (!(id = ebml_peek_id (matroska,
1673                                                     &matroska->level_up))) {
1674                                     res = AVERROR_IO;
1675                                     break;
1676                                 } else if (matroska->level_up) {
1677                                     matroska->level_up--;
1678                                     break;
1679                                 }
1680
1681                                 switch (id) {
1682                                     /* track number */
1683                                     case MATROSKA_ID_CUETRACK: {
1684                                         uint64_t num;
1685                                         if ((res = ebml_read_uint(matroska,
1686                                                           &id, &num)) < 0)
1687                                             break;
1688                                         idx.track = num;
1689                                         break;
1690                                     }
1691
1692                                         /* position in file */
1693                                     case MATROSKA_ID_CUECLUSTERPOSITION: {
1694                                         uint64_t num;
1695                                         if ((res = ebml_read_uint(matroska,
1696                                                           &id, &num)) < 0)
1697                                             break;
1698                                         idx.pos = num;
1699                                         break;
1700                                     }
1701
1702                                     default:
1703                                         av_log(matroska->ctx, AV_LOG_INFO,
1704                                                "Unknown entry 0x%x in "
1705                                                "CuesTrackPositions\n", id);
1706                                         /* fall-through */
1707
1708                                     case EBML_ID_VOID:
1709                                         res = ebml_read_skip(matroska);
1710                                         break;
1711                                 }
1712
1713                                 if (matroska->level_up) {
1714                                     matroska->level_up--;
1715                                     break;
1716                                 }
1717                             }
1718
1719                             break;
1720
1721                         default:
1722                             av_log(matroska->ctx, AV_LOG_INFO,
1723                                    "Unknown entry 0x%x in cuespoint "
1724                                    "index\n", id);
1725                             /* fall-through */
1726
1727                         case EBML_ID_VOID:
1728                             res = ebml_read_skip(matroska);
1729                             break;
1730                     }
1731
1732                     if (matroska->level_up) {
1733                         matroska->level_up--;
1734                         break;
1735                     }
1736                 }
1737
1738                 /* so let's see if we got what we wanted */
1739                 if (idx.pos   != (uint64_t) -1 &&
1740                     idx.time  != (uint64_t) -1 &&
1741                     idx.track != (uint16_t) -1) {
1742                     if (matroska->num_indexes % 32 == 0) {
1743                         /* re-allocate bigger index */
1744                         matroska->index =
1745                             av_realloc(matroska->index,
1746                                        (matroska->num_indexes + 32) *
1747                                        sizeof(MatroskaDemuxIndex));
1748                     }
1749                     matroska->index[matroska->num_indexes] = idx;
1750                     matroska->num_indexes++;
1751                 }
1752                 break;
1753
1754             default:
1755                 av_log(matroska->ctx, AV_LOG_INFO,
1756                        "Unknown entry 0x%x in cues header\n", id);
1757                 /* fall-through */
1758
1759             case EBML_ID_VOID:
1760                 res = ebml_read_skip(matroska);
1761                 break;
1762         }
1763
1764         if (matroska->level_up) {
1765             matroska->level_up--;
1766             break;
1767         }
1768     }
1769
1770     return res;
1771 }
1772
1773 static int
1774 matroska_parse_metadata (MatroskaDemuxContext *matroska)
1775 {
1776     int res = 0;
1777     uint32_t id;
1778
1779     while (res == 0) {
1780         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1781             res = AVERROR_IO;
1782             break;
1783         } else if (matroska->level_up) {
1784             matroska->level_up--;
1785             break;
1786         }
1787
1788         switch (id) {
1789             /* Hm, this is unsupported... */
1790             default:
1791                 av_log(matroska->ctx, AV_LOG_INFO,
1792                        "Unknown entry 0x%x in metadata header\n", id);
1793                 /* fall-through */
1794
1795             case EBML_ID_VOID:
1796                 res = ebml_read_skip(matroska);
1797                 break;
1798         }
1799
1800         if (matroska->level_up) {
1801             matroska->level_up--;
1802             break;
1803         }
1804     }
1805
1806     return res;
1807 }
1808
1809 static int
1810 matroska_parse_seekhead (MatroskaDemuxContext *matroska)
1811 {
1812     int res = 0;
1813     uint32_t id;
1814
1815     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing seekhead...\n");
1816
1817     while (res == 0) {
1818         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1819             res = AVERROR_IO;
1820             break;
1821         } else if (matroska->level_up) {
1822             matroska->level_up--;
1823             break;
1824         }
1825
1826         switch (id) {
1827             case MATROSKA_ID_SEEKENTRY: {
1828                 uint32_t seek_id = 0, peek_id_cache = 0;
1829                 uint64_t seek_pos = (uint64_t) -1, t;
1830
1831                 if ((res = ebml_read_master(matroska, &id)) < 0)
1832                     break;
1833
1834                 while (res == 0) {
1835                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1836                         res = AVERROR_IO;
1837                         break;
1838                     } else if (matroska->level_up) {
1839                         matroska->level_up--;
1840                         break;
1841                     }
1842
1843                     switch (id) {
1844                         case MATROSKA_ID_SEEKID:
1845                             res = ebml_read_uint(matroska, &id, &t);
1846                             seek_id = t;
1847                             break;
1848
1849                         case MATROSKA_ID_SEEKPOSITION:
1850                             res = ebml_read_uint(matroska, &id, &seek_pos);
1851                             break;
1852
1853                         default:
1854                             av_log(matroska->ctx, AV_LOG_INFO,
1855                                    "Unknown seekhead ID 0x%x\n", id);
1856                             /* fall-through */
1857
1858                         case EBML_ID_VOID:
1859                             res = ebml_read_skip(matroska);
1860                             break;
1861                     }
1862
1863                     if (matroska->level_up) {
1864                         matroska->level_up--;
1865                         break;
1866                     }
1867                 }
1868
1869                 if (!seek_id || seek_pos == (uint64_t) -1) {
1870                     av_log(matroska->ctx, AV_LOG_INFO,
1871                            "Incomplete seekhead entry (0x%x/%"PRIu64")\n",
1872                            seek_id, seek_pos);
1873                     break;
1874                 }
1875
1876                 switch (seek_id) {
1877                     case MATROSKA_ID_CUES:
1878                     case MATROSKA_ID_TAGS: {
1879                         uint32_t level_up = matroska->level_up;
1880                         offset_t before_pos;
1881                         uint64_t length;
1882                         MatroskaLevel level;
1883
1884                         /* remember the peeked ID and the current position */
1885                         peek_id_cache = matroska->peek_id;
1886                         before_pos = url_ftell(&matroska->ctx->pb);
1887
1888                         /* seek */
1889                         if ((res = ebml_read_seek(matroska, seek_pos +
1890                                                matroska->segment_start)) < 0)
1891                             return res;
1892
1893                         /* we don't want to lose our seekhead level, so we add
1894                          * a dummy. This is a crude hack. */
1895                         if (matroska->num_levels == EBML_MAX_DEPTH) {
1896                             av_log(matroska->ctx, AV_LOG_INFO,
1897                                    "Max EBML element depth (%d) reached, "
1898                                    "cannot parse further.\n", EBML_MAX_DEPTH);
1899                             return AVERROR_UNKNOWN;
1900                         }
1901
1902                         level.start = 0;
1903                         level.length = (uint64_t)-1;
1904                         matroska->levels[matroska->num_levels] = level;
1905                         matroska->num_levels++;
1906
1907                         /* check ID */
1908                         if (!(id = ebml_peek_id (matroska,
1909                                                  &matroska->level_up)))
1910                             break;
1911                         if (id != seek_id) {
1912                             av_log(matroska->ctx, AV_LOG_INFO,
1913                                    "We looked for ID=0x%x but got "
1914                                    "ID=0x%x (pos=%"PRIu64")",
1915                                    seek_id, id, seek_pos +
1916                                    matroska->segment_start);
1917                             goto finish;
1918                         }
1919
1920                         /* read master + parse */
1921                         if ((res = ebml_read_master(matroska, &id)) < 0)
1922                             break;
1923                         switch (id) {
1924                             case MATROSKA_ID_CUES:
1925                                 if (!(res = matroska_parse_index(matroska)) ||
1926                                     url_feof(&matroska->ctx->pb)) {
1927                                     matroska->index_parsed = 1;
1928                                     res = 0;
1929                                 }
1930                                 break;
1931                             case MATROSKA_ID_TAGS:
1932                                 if (!(res = matroska_parse_metadata(matroska)) ||
1933                                    url_feof(&matroska->ctx->pb)) {
1934                                     matroska->metadata_parsed = 1;
1935                                     res = 0;
1936                                 }
1937                                 break;
1938                         }
1939                         if (res < 0)
1940                             break;
1941
1942                     finish:
1943                         /* remove dummy level */
1944                         while (matroska->num_levels) {
1945                             matroska->num_levels--;
1946                             length =
1947                                 matroska->levels[matroska->num_levels].length;
1948                             if (length == (uint64_t)-1)
1949                                 break;
1950                         }
1951
1952                         /* seek back */
1953                         if ((res = ebml_read_seek(matroska, before_pos)) < 0)
1954                             return res;
1955                         matroska->peek_id = peek_id_cache;
1956                         matroska->level_up = level_up;
1957                         break;
1958                     }
1959
1960                     default:
1961                         av_log(matroska->ctx, AV_LOG_INFO,
1962                                "Ignoring seekhead entry for ID=0x%x\n",
1963                                seek_id);
1964                         break;
1965                 }
1966
1967                 break;
1968             }
1969
1970             default:
1971                 av_log(matroska->ctx, AV_LOG_INFO,
1972                        "Unknown seekhead ID 0x%x\n", id);
1973                 /* fall-through */
1974
1975             case EBML_ID_VOID:
1976                 res = ebml_read_skip(matroska);
1977                 break;
1978         }
1979
1980         if (matroska->level_up) {
1981             matroska->level_up--;
1982             break;
1983         }
1984     }
1985
1986     return res;
1987 }
1988
1989 static int
1990 matroska_read_header (AVFormatContext    *s,
1991                       AVFormatParameters *ap)
1992 {
1993     MatroskaDemuxContext *matroska = s->priv_data;
1994     char *doctype;
1995     int version, last_level, res = 0;
1996     uint32_t id;
1997
1998     matroska->ctx = s;
1999
2000     /* First read the EBML header. */
2001     doctype = NULL;
2002     if ((res = ebml_read_header(matroska, &doctype, &version)) < 0)
2003         return res;
2004     if ((doctype == NULL) || strcmp(doctype, "matroska")) {
2005         av_log(matroska->ctx, AV_LOG_ERROR,
2006                "Wrong EBML doctype ('%s' != 'matroska').\n",
2007                doctype ? doctype : "(none)");
2008         if (doctype)
2009             av_free(doctype);
2010         return AVERROR_NOFMT;
2011     }
2012     av_free(doctype);
2013     if (version != 1) {
2014         av_log(matroska->ctx, AV_LOG_ERROR,
2015                "Matroska demuxer version 1 too old for file version %d\n",
2016                version);
2017         return AVERROR_NOFMT;
2018     }
2019
2020     /* The next thing is a segment. */
2021     while (1) {
2022         if (!(id = ebml_peek_id(matroska, &last_level)))
2023             return AVERROR_IO;
2024         if (id == MATROSKA_ID_SEGMENT)
2025             break;
2026
2027         /* oi! */
2028         av_log(matroska->ctx, AV_LOG_INFO,
2029                "Expected a Segment ID (0x%x), but received 0x%x!\n",
2030                MATROSKA_ID_SEGMENT, id);
2031         if ((res = ebml_read_skip(matroska)) < 0)
2032             return res;
2033     }
2034
2035     /* We now have a Matroska segment.
2036      * Seeks are from the beginning of the segment,
2037      * after the segment ID/length. */
2038     if ((res = ebml_read_master(matroska, &id)) < 0)
2039         return res;
2040     matroska->segment_start = url_ftell(&s->pb);
2041
2042     matroska->time_scale = 1000000;
2043     /* we've found our segment, start reading the different contents in here */
2044     while (res == 0) {
2045         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2046             res = AVERROR_IO;
2047             break;
2048         } else if (matroska->level_up) {
2049             matroska->level_up--;
2050             break;
2051         }
2052
2053         switch (id) {
2054             /* stream info */
2055             case MATROSKA_ID_INFO: {
2056                 if ((res = ebml_read_master(matroska, &id)) < 0)
2057                     break;
2058                 res = matroska_parse_info(matroska);
2059                 break;
2060             }
2061
2062             /* track info headers */
2063             case MATROSKA_ID_TRACKS: {
2064                 if ((res = ebml_read_master(matroska, &id)) < 0)
2065                     break;
2066                 res = matroska_parse_tracks(matroska);
2067                 break;
2068             }
2069
2070             /* stream index */
2071             case MATROSKA_ID_CUES: {
2072                 if (!matroska->index_parsed) {
2073                     if ((res = ebml_read_master(matroska, &id)) < 0)
2074                         break;
2075                     res = matroska_parse_index(matroska);
2076                 } else
2077                     res = ebml_read_skip(matroska);
2078                 break;
2079             }
2080
2081             /* metadata */
2082             case MATROSKA_ID_TAGS: {
2083                 if (!matroska->metadata_parsed) {
2084                     if ((res = ebml_read_master(matroska, &id)) < 0)
2085                         break;
2086                     res = matroska_parse_metadata(matroska);
2087                 } else
2088                     res = ebml_read_skip(matroska);
2089                 break;
2090             }
2091
2092             /* file index (if seekable, seek to Cues/Tags to parse it) */
2093             case MATROSKA_ID_SEEKHEAD: {
2094                 if ((res = ebml_read_master(matroska, &id)) < 0)
2095                     break;
2096                 res = matroska_parse_seekhead(matroska);
2097                 break;
2098             }
2099
2100             case MATROSKA_ID_CLUSTER: {
2101                 /* Do not read the master - this will be done in the next
2102                  * call to matroska_read_packet. */
2103                 res = 1;
2104                 break;
2105             }
2106
2107             default:
2108                 av_log(matroska->ctx, AV_LOG_INFO,
2109                        "Unknown matroska file header ID 0x%x\n", id);
2110             /* fall-through */
2111
2112             case EBML_ID_VOID:
2113                 res = ebml_read_skip(matroska);
2114                 break;
2115         }
2116
2117         if (matroska->level_up) {
2118             matroska->level_up--;
2119             break;
2120         }
2121     }
2122
2123     if (res < 0)
2124         return res;
2125
2126     /* Have we found a cluster? */
2127     if (res == 1) {
2128         int i;
2129         enum CodecID codec_id;
2130         MatroskaTrack *track;
2131         AVStream *st;
2132         void *extradata = NULL;
2133         int extradata_size = 0;
2134
2135         for (i = 0; i < matroska->num_tracks; i++) {
2136             track = matroska->tracks[i];
2137
2138             /* libavformat does not really support subtitles.
2139              * Also apply some sanity checks. */
2140             if ((track->type == MATROSKA_TRACK_TYPE_SUBTITLE) ||
2141                 (track->codec_id == NULL))
2142                 continue;
2143
2144             /* Set the FourCC from the CodecID. */
2145             /* This is the MS compatibility mode which stores a
2146              * BITMAPINFOHEADER in the CodecPrivate. */
2147             if (!strcmp(track->codec_id,
2148                         MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC) &&
2149                 (track->codec_priv_size >= 40) &&
2150                 (track->codec_priv != NULL)) {
2151                 unsigned char *p;
2152
2153                 /* Offset of biCompression. Stored in LE. */
2154                 p = (unsigned char *)track->codec_priv + 16;
2155                 ((MatroskaVideoTrack *)track)->fourcc = (p[3] << 24) |
2156                                  (p[2] << 16) | (p[1] << 8) | p[0];
2157                 codec_id = codec_get_bmp_id(((MatroskaVideoTrack *)track)->fourcc);
2158
2159             } else if (!strcmp(track->codec_id,
2160                                MATROSKA_CODEC_ID_VIDEO_MPEG4_SP) ||
2161                        !strcmp(track->codec_id,
2162                                MATROSKA_CODEC_ID_VIDEO_MPEG4_ASP) ||
2163                        !strcmp(track->codec_id,
2164                                MATROSKA_CODEC_ID_VIDEO_MPEG4_AP))
2165                 codec_id = CODEC_ID_MPEG4;
2166             else if (!strcmp(track->codec_id,
2167                              MATROSKA_CODEC_ID_VIDEO_MPEG4_AVC))
2168                 codec_id = CODEC_ID_H264;
2169 /*             else if (!strcmp(track->codec_id, */
2170 /*                              MATROSKA_CODEC_ID_VIDEO_UNCOMPRESSED)) */
2171 /*                 codec_id = CODEC_ID_???; */
2172             else if (!strcmp(track->codec_id,
2173                              MATROSKA_CODEC_ID_VIDEO_MSMPEG4V3))
2174                 codec_id = CODEC_ID_MSMPEG4V3;
2175             else if (!strcmp(track->codec_id,
2176                              MATROSKA_CODEC_ID_VIDEO_MPEG1) ||
2177                      !strcmp(track->codec_id,
2178                              MATROSKA_CODEC_ID_VIDEO_MPEG2))
2179                 codec_id = CODEC_ID_MPEG2VIDEO;
2180
2181             /* This is the MS compatibility mode which stores a
2182              * WAVEFORMATEX in the CodecPrivate. */
2183             else if (!strcmp(track->codec_id,
2184                              MATROSKA_CODEC_ID_AUDIO_ACM) &&
2185                 (track->codec_priv_size >= 18) &&
2186                 (track->codec_priv != NULL)) {
2187                 unsigned char *p;
2188                 uint16_t tag;
2189
2190                 /* Offset of wFormatTag. Stored in LE. */
2191                 p = (unsigned char *)track->codec_priv;
2192                 tag = (p[1] << 8) | p[0];
2193                 codec_id = codec_get_wav_id(tag);
2194
2195             } else if (!strcmp(track->codec_id,
2196                                MATROSKA_CODEC_ID_AUDIO_MPEG1_L1) ||
2197                        !strcmp(track->codec_id,
2198                                MATROSKA_CODEC_ID_AUDIO_MPEG1_L2) ||
2199                        !strcmp(track->codec_id,
2200                                MATROSKA_CODEC_ID_AUDIO_MPEG1_L3))
2201                 codec_id = CODEC_ID_MP3;
2202             else if (!strcmp(track->codec_id,
2203                              MATROSKA_CODEC_ID_AUDIO_PCM_INT_BE))
2204                 codec_id = CODEC_ID_PCM_U16BE;
2205             else if (!strcmp(track->codec_id,
2206                              MATROSKA_CODEC_ID_AUDIO_PCM_INT_LE))
2207                 codec_id = CODEC_ID_PCM_U16LE;
2208 /*             else if (!strcmp(track->codec_id, */
2209 /*                              MATROSKA_CODEC_ID_AUDIO_PCM_FLOAT)) */
2210 /*                 codec_id = CODEC_ID_PCM_???; */
2211             else if (!strcmp(track->codec_id,
2212                              MATROSKA_CODEC_ID_AUDIO_AC3))
2213                 codec_id = CODEC_ID_AC3;
2214             else if (!strcmp(track->codec_id,
2215                              MATROSKA_CODEC_ID_AUDIO_DTS))
2216                 codec_id = CODEC_ID_DTS;
2217             /* No such codec id so far. */
2218 /*             else if (!strcmp(track->codec_id, */
2219 /*                              MATROSKA_CODEC_ID_AUDIO_DTS)) */
2220 /*                 codec_id = CODEC_ID_DTS; */
2221             else if (!strcmp(track->codec_id,
2222                              MATROSKA_CODEC_ID_AUDIO_VORBIS)) {
2223                 extradata_size = track->codec_priv_size;
2224                 if(extradata_size) {
2225                     extradata = av_malloc(extradata_size);
2226                     if(extradata == NULL)
2227                         return AVERROR_NOMEM;
2228                     memcpy(extradata, track->codec_priv, extradata_size);
2229                 }
2230                 codec_id = CODEC_ID_VORBIS;
2231             } else if (!strcmp(track->codec_id,
2232                                MATROSKA_CODEC_ID_AUDIO_MPEG2) ||
2233                        !strcmp(track->codec_id,
2234                                MATROSKA_CODEC_ID_AUDIO_MPEG4))
2235                 codec_id = CODEC_ID_AAC;
2236             else
2237                 codec_id = CODEC_ID_NONE;
2238
2239             if (codec_id == CODEC_ID_NONE) {
2240                 av_log(matroska->ctx, AV_LOG_INFO,
2241                        "Unknown/unsupported CodecID %s.\n",
2242                        track->codec_id);
2243             }
2244
2245             track->stream_index = matroska->num_streams;
2246
2247             matroska->num_streams++;
2248             st = av_new_stream(s, track->stream_index);
2249             if (st == NULL)
2250                 return AVERROR_NOMEM;
2251             av_set_pts_info(st, 24, 1, 1000); /* 24 bit pts in ms */
2252
2253             st->codec->codec_id = codec_id;
2254
2255             if(extradata){
2256                 st->codec->extradata = extradata;
2257                 st->codec->extradata_size = extradata_size;
2258             } else if(track->codec_priv && track->codec_priv_size > 0){
2259                 st->codec->extradata = av_malloc(track->codec_priv_size);
2260                 if(st->codec->extradata == NULL)
2261                     return AVERROR_NOMEM;
2262                 st->codec->extradata_size = track->codec_priv_size;
2263                 memcpy(st->codec->extradata, track->codec_priv,
2264                        track->codec_priv_size);
2265             }
2266
2267             if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
2268                 MatroskaVideoTrack *videotrack = (MatroskaVideoTrack *)track;
2269
2270                 st->codec->codec_type = CODEC_TYPE_VIDEO;
2271                 st->codec->codec_tag = videotrack->fourcc;
2272                 st->codec->width = videotrack->pixel_width;
2273                 st->codec->height = videotrack->pixel_height;
2274                 if (videotrack->display_width == 0)
2275                     st->codec->sample_aspect_ratio.num =
2276                         videotrack->pixel_width;
2277                 else
2278                     st->codec->sample_aspect_ratio.num =
2279                         videotrack->display_width;
2280                 if (videotrack->display_height == 0)
2281                     st->codec->sample_aspect_ratio.num =
2282                         videotrack->pixel_height;
2283                 else
2284                     st->codec->sample_aspect_ratio.num =
2285                         videotrack->display_height;
2286
2287             } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
2288                 MatroskaAudioTrack *audiotrack = (MatroskaAudioTrack *)track;
2289
2290                 st->codec->codec_type = CODEC_TYPE_AUDIO;
2291                 st->codec->sample_rate = audiotrack->samplerate;
2292                 st->codec->channels = audiotrack->channels;
2293             }
2294
2295             /* What do we do with private data? E.g. for Vorbis. */
2296         }
2297     }
2298
2299     return 0;
2300 }
2301
2302 static int
2303 matroska_find_track_by_num (MatroskaDemuxContext *matroska,
2304                             int                   num)
2305 {
2306     int i;
2307
2308     for (i = 0; i < matroska->num_tracks; i++)
2309         if (matroska->tracks[i]->num == num)
2310             return i;
2311
2312     return -1;
2313 }
2314
2315 static int
2316 matroska_parse_blockgroup (MatroskaDemuxContext *matroska,
2317                            uint64_t              cluster_time)
2318 {
2319     int res = 0;
2320     uint32_t id;
2321     AVPacket *pkt;
2322     int is_keyframe = PKT_FLAG_KEY, last_num_packets = matroska->num_packets;
2323
2324     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing blockgroup...\n");
2325
2326     while (res == 0) {
2327         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2328             res = AVERROR_IO;
2329             break;
2330         } else if (matroska->level_up) {
2331             matroska->level_up--;
2332             break;
2333         }
2334
2335         switch (id) {
2336             /* one block inside the group. Note, block parsing is one
2337              * of the harder things, so this code is a bit complicated.
2338              * See http://www.matroska.org/ for documentation. */
2339             case MATROSKA_ID_BLOCK: {
2340                 uint8_t *data, *origdata;
2341                 int size;
2342                 uint64_t time;
2343                 uint32_t *lace_size = NULL;
2344                 int n, track, flags, laces = 0;
2345                 uint64_t num;
2346                 int64_t pos= url_ftell(&matroska->ctx->pb);
2347
2348                 if ((res = ebml_read_binary(matroska, &id, &data, &size)) < 0)
2349                     break;
2350                 origdata = data;
2351
2352                 /* first byte(s): blocknum */
2353                 if ((n = matroska_ebmlnum_uint(data, size, &num)) < 0) {
2354                     av_log(matroska->ctx, AV_LOG_ERROR,
2355                            "EBML block data error\n");
2356                     av_free(origdata);
2357                     break;
2358                 }
2359                 data += n;
2360                 size -= n;
2361
2362                 /* fetch track from num */
2363                 track = matroska_find_track_by_num(matroska, num);
2364                 if (size <= 3 || track < 0 || track >= matroska->num_tracks) {
2365                     av_log(matroska->ctx, AV_LOG_INFO,
2366                            "Invalid stream %d or size %u\n", track, size);
2367                     av_free(origdata);
2368                     break;
2369                 }
2370                 if(matroska->ctx->streams[ matroska->tracks[track]->stream_index ]->discard >= AVDISCARD_ALL){
2371                     av_free(origdata);
2372                     break;
2373                 }
2374
2375                 /* time (relative to cluster time) */
2376                 time = ((data[0] << 8) | data[1]) * matroska->time_scale;
2377                 data += 2;
2378                 size -= 2;
2379                 flags = *data;
2380                 data += 1;
2381                 size -= 1;
2382                 switch ((flags & 0x06) >> 1) {
2383                     case 0x0: /* no lacing */
2384                         laces = 1;
2385                         lace_size = av_mallocz(sizeof(int));
2386                         lace_size[0] = size;
2387                         break;
2388
2389                     case 0x1: /* xiph lacing */
2390                     case 0x2: /* fixed-size lacing */
2391                     case 0x3: /* EBML lacing */
2392                         if (size == 0) {
2393                             res = -1;
2394                             break;
2395                         }
2396                         laces = (*data) + 1;
2397                         data += 1;
2398                         size -= 1;
2399                         lace_size = av_mallocz(laces * sizeof(int));
2400
2401                         switch ((flags & 0x06) >> 1) {
2402                             case 0x1: /* xiph lacing */ {
2403                                 uint8_t temp;
2404                                 uint32_t total = 0;
2405                                 for (n = 0; res == 0 && n < laces - 1; n++) {
2406                                     while (1) {
2407                                         if (size == 0) {
2408                                             res = -1;
2409                                             break;
2410                                         }
2411                                         temp = *data;
2412                                         lace_size[n] += temp;
2413                                         data += 1;
2414                                         size -= 1;
2415                                         if (temp != 0xff)
2416                                             break;
2417                                     }
2418                                     total += lace_size[n];
2419                                 }
2420                                 lace_size[n] = size - total;
2421                                 break;
2422                             }
2423
2424                             case 0x2: /* fixed-size lacing */
2425                                 for (n = 0; n < laces; n++)
2426                                     lace_size[n] = size / laces;
2427                                 break;
2428
2429                             case 0x3: /* EBML lacing */ {
2430                                 uint32_t total;
2431                                 n = matroska_ebmlnum_uint(data, size, &num);
2432                                 if (n < 0) {
2433                                     av_log(matroska->ctx, AV_LOG_INFO,
2434                                            "EBML block data error\n");
2435                                     break;
2436                                 }
2437                                 data += n;
2438                                 size -= n;
2439                                 total = lace_size[0] = num;
2440                                 for (n = 1; res == 0 && n < laces - 1; n++) {
2441                                     int64_t snum;
2442                                     int r;
2443                                     r = matroska_ebmlnum_sint (data, size,
2444                                                                &snum);
2445                                     if (r < 0) {
2446                                         av_log(matroska->ctx, AV_LOG_INFO,
2447                                                "EBML block data error\n");
2448                                         break;
2449                                     }
2450                                     data += r;
2451                                     size -= r;
2452                                     lace_size[n] = lace_size[n - 1] + snum;
2453                                     total += lace_size[n];
2454                                 }
2455                                 lace_size[n] = size - total;
2456                                 break;
2457                             }
2458                         }
2459                         break;
2460                 }
2461
2462                 if (res == 0) {
2463                     for (n = 0; n < laces; n++) {
2464                         uint64_t timecode = 0;
2465
2466                         pkt = av_mallocz(sizeof(AVPacket));
2467                         /* XXX: prevent data copy... */
2468                         if (av_new_packet(pkt,lace_size[n]) < 0) {
2469                             res = AVERROR_NOMEM;
2470                             break;
2471                         }
2472                         if (cluster_time != (uint64_t)-1) {
2473                             if (time < 0 && (-time) > cluster_time)
2474                                 timecode = cluster_time;
2475                             else
2476                                 timecode = cluster_time + time;
2477                         }
2478                         /* FIXME: duration */
2479
2480                         memcpy(pkt->data, data, lace_size[n]);
2481                         data += lace_size[n];
2482                         if (n == 0)
2483                             pkt->flags = is_keyframe;
2484                         pkt->stream_index =
2485                             matroska->tracks[track]->stream_index;
2486
2487                         pkt->pts = timecode / 1000000; /* ns to ms */
2488                         pkt->pos= pos;
2489
2490                         matroska_queue_packet(matroska, pkt);
2491                     }
2492                 }
2493
2494                 av_free(lace_size);
2495                 av_free(origdata);
2496                 break;
2497             }
2498
2499             case MATROSKA_ID_BLOCKDURATION: {
2500                 uint64_t num;
2501                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
2502                     break;
2503                 av_log(matroska->ctx, AV_LOG_INFO,
2504                        "FIXME: implement support for BlockDuration\n");
2505                 break;
2506             }
2507
2508             case MATROSKA_ID_BLOCKREFERENCE:
2509                 /* We've found a reference, so not even the first frame in
2510                  * the lace is a key frame. */
2511                 is_keyframe = 0;
2512                 if (last_num_packets != matroska->num_packets)
2513                     matroska->packets[last_num_packets]->flags = 0;
2514                 res = ebml_read_skip(matroska);
2515                 break;
2516
2517             default:
2518                 av_log(matroska->ctx, AV_LOG_INFO,
2519                        "Unknown entry 0x%x in blockgroup data\n", id);
2520                 /* fall-through */
2521
2522             case EBML_ID_VOID:
2523                 res = ebml_read_skip(matroska);
2524                 break;
2525         }
2526
2527         if (matroska->level_up) {
2528             matroska->level_up--;
2529             break;
2530         }
2531     }
2532
2533     return res;
2534 }
2535
2536 static int
2537 matroska_parse_cluster (MatroskaDemuxContext *matroska)
2538 {
2539     int res = 0;
2540     uint32_t id;
2541     uint64_t cluster_time = 0;
2542
2543     av_log(matroska->ctx, AV_LOG_DEBUG,
2544            "parsing cluster at %"PRId64"\n", url_ftell(&matroska->ctx->pb));
2545
2546     while (res == 0) {
2547         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2548             res = AVERROR_IO;
2549             break;
2550         } else if (matroska->level_up) {
2551             matroska->level_up--;
2552             break;
2553         }
2554
2555         switch (id) {
2556             /* cluster timecode */
2557             case MATROSKA_ID_CLUSTERTIMECODE: {
2558                 uint64_t num;
2559                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
2560                     break;
2561                 cluster_time = num * matroska->time_scale;
2562                 break;
2563             }
2564
2565                 /* a group of blocks inside a cluster */
2566             case MATROSKA_ID_BLOCKGROUP:
2567                 if ((res = ebml_read_master(matroska, &id)) < 0)
2568                     break;
2569                 res = matroska_parse_blockgroup(matroska, cluster_time);
2570                 break;
2571
2572             default:
2573                 av_log(matroska->ctx, AV_LOG_INFO,
2574                        "Unknown entry 0x%x in cluster data\n", id);
2575                 /* fall-through */
2576
2577             case EBML_ID_VOID:
2578                 res = ebml_read_skip(matroska);
2579                 break;
2580         }
2581
2582         if (matroska->level_up) {
2583             matroska->level_up--;
2584             break;
2585         }
2586     }
2587
2588     return res;
2589 }
2590
2591 static int
2592 matroska_read_packet (AVFormatContext *s,
2593                       AVPacket        *pkt)
2594 {
2595     MatroskaDemuxContext *matroska = s->priv_data;
2596     int res = 0;
2597     uint32_t id;
2598
2599     /* Do we still have a packet queued? */
2600     if (matroska_deliver_packet(matroska, pkt) == 0)
2601         return 0;
2602
2603     /* Have we already reached the end? */
2604     if (matroska->done)
2605         return AVERROR_IO;
2606
2607     while (res == 0) {
2608         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2609             res = AVERROR_IO;
2610             break;
2611         } else if (matroska->level_up) {
2612             matroska->level_up--;
2613             break;
2614         }
2615
2616         switch (id) {
2617             case MATROSKA_ID_CLUSTER:
2618                 if ((res = ebml_read_master(matroska, &id)) < 0)
2619                     break;
2620                 if ((res = matroska_parse_cluster(matroska)) == 0)
2621                     res = 1; /* Parsed one cluster, let's get out. */
2622                 break;
2623
2624             default:
2625             case EBML_ID_VOID:
2626                 res = ebml_read_skip(matroska);
2627                 break;
2628         }
2629
2630         if (matroska->level_up) {
2631             matroska->level_up--;
2632             break;
2633         }
2634     }
2635
2636     if (res == -1)
2637         matroska->done = 1;
2638
2639     return matroska_deliver_packet(matroska, pkt);
2640 }
2641
2642 static int
2643 matroska_read_close (AVFormatContext *s)
2644 {
2645     MatroskaDemuxContext *matroska = s->priv_data;
2646     int n = 0;
2647
2648     if (matroska->writing_app)
2649         av_free(matroska->writing_app);
2650     if (matroska->muxing_app)
2651         av_free(matroska->muxing_app);
2652     if (matroska->index)
2653         av_free(matroska->index);
2654
2655     if (matroska->packets != NULL) {
2656         for (n = 0; n < matroska->num_packets; n++) {
2657             av_free_packet(matroska->packets[n]);
2658             av_free(matroska->packets[n]);
2659         }
2660         av_free(matroska->packets);
2661     }
2662
2663     for (n = 0; n < matroska->num_tracks; n++) {
2664         MatroskaTrack *track = matroska->tracks[n];
2665         if (track->codec_id)
2666             av_free(track->codec_id);
2667         if (track->codec_name)
2668             av_free(track->codec_name);
2669         if (track->codec_priv)
2670             av_free(track->codec_priv);
2671         if (track->name)
2672             av_free(track->name);
2673         if (track->language)
2674             av_free(track->language);
2675
2676         av_free(track);
2677     }
2678
2679     for (n = 0; n < s->nb_streams; n++) {
2680         av_free(s->streams[n]->codec->extradata);
2681     }
2682
2683     memset(matroska, 0, sizeof(MatroskaDemuxContext));
2684
2685     return 0;
2686 }
2687
2688 static AVInputFormat matroska_iformat = {
2689     "matroska",
2690     "Matroska file format",
2691     sizeof(MatroskaDemuxContext),
2692     matroska_probe,
2693     matroska_read_header,
2694     matroska_read_packet,
2695     matroska_read_close,
2696 };
2697
2698 int
2699 matroska_init(void)
2700 {
2701     av_register_input_format(&matroska_iformat);
2702     return 0;
2703 }