]> git.sesse.net Git - ffmpeg/blob - libavformat/matroskadec.c
Add backslash '\' support to the parser
[ffmpeg] / libavformat / matroskadec.c
1 /*
2  * Matroska file demuxer (no muxer yet)
3  * Copyright (c) 2003-2004 The ffmpeg Project
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file matroskadec.c
24  * Matroska file demuxer
25  * by Ronald Bultje <rbultje@ronald.bitfreak.net>
26  * with a little help from Moritz Bunkus <moritz@bunkus.org>
27  * Specs available on the matroska project page:
28  * http://www.matroska.org/.
29  */
30
31 #include "avformat.h"
32 /* For codec_get_id(). */
33 #include "riff.h"
34 #include "intfloat_readwrite.h"
35 #include "matroska.h"
36 #include "libavcodec/mpeg4audio.h"
37
38 typedef struct Track {
39     MatroskaTrackType type;
40
41     /* Unique track number and track ID. stream_index is the index that
42      * the calling app uses for this track. */
43     uint32_t num;
44     uint32_t uid;
45     int stream_index;
46
47     char *name;
48     char language[4];
49
50     char *codec_id;
51     char *codec_name;
52
53     unsigned char *codec_priv;
54     int codec_priv_size;
55
56     uint64_t default_duration;
57     MatroskaTrackFlags flags;
58 } MatroskaTrack;
59
60 typedef struct MatroskaVideoTrack {
61     MatroskaTrack track;
62
63     int pixel_width;
64     int pixel_height;
65     int display_width;
66     int display_height;
67
68     uint32_t fourcc;
69
70     MatroskaAspectRatioMode ar_mode;
71     MatroskaEyeMode eye_mode;
72
73     //..
74 } MatroskaVideoTrack;
75
76 typedef struct MatroskaAudioTrack {
77     MatroskaTrack track;
78
79     int channels;
80     int bitdepth;
81     int internal_samplerate;
82     int samplerate;
83     int block_align;
84
85     /* real audio header */
86     int coded_framesize;
87     int sub_packet_h;
88     int frame_size;
89     int sub_packet_size;
90     int sub_packet_cnt;
91     int pkt_cnt;
92     uint8_t *buf;
93     //..
94 } MatroskaAudioTrack;
95
96 typedef struct MatroskaSubtitleTrack {
97     MatroskaTrack track;
98     //..
99 } MatroskaSubtitleTrack;
100
101 #define MAX_TRACK_SIZE (FFMAX(FFMAX(sizeof(MatroskaVideoTrack), \
102                                     sizeof(MatroskaAudioTrack)), \
103                                     sizeof(MatroskaSubtitleTrack)))
104
105 typedef struct MatroskaLevel {
106     uint64_t start;
107     uint64_t length;
108 } MatroskaLevel;
109
110 typedef struct MatroskaDemuxIndex {
111   uint64_t        pos;   /* of the corresponding *cluster*! */
112   uint16_t        track; /* reference to 'num' */
113   uint64_t        time;  /* in nanoseconds */
114 } MatroskaDemuxIndex;
115
116 typedef struct MatroskaDemuxContext {
117     AVFormatContext *ctx;
118
119     /* ebml stuff */
120     int num_levels;
121     MatroskaLevel levels[EBML_MAX_DEPTH];
122     int level_up;
123
124     /* matroska stuff */
125     char *writing_app;
126     char *muxing_app;
127     int64_t created;
128
129     /* timescale in the file */
130     int64_t time_scale;
131
132     /* num_streams is the number of streams that av_new_stream() was called
133      * for ( = that are available to the calling program). */
134     int num_tracks;
135     int num_streams;
136     MatroskaTrack *tracks[MAX_STREAMS];
137
138     /* cache for ID peeking */
139     uint32_t peek_id;
140
141     /* byte position of the segment inside the stream */
142     offset_t segment_start;
143
144     /* The packet queue. */
145     AVPacket **packets;
146     int num_packets;
147
148     /* have we already parse metadata/cues/clusters? */
149     int metadata_parsed;
150     int index_parsed;
151     int done;
152
153     /* The index for seeking. */
154     int num_indexes;
155     MatroskaDemuxIndex *index;
156
157     /* What to skip before effectively reading a packet. */
158     int skip_to_keyframe;
159     AVStream *skip_to_stream;
160 } MatroskaDemuxContext;
161
162 /*
163  * The first few functions handle EBML file parsing. The rest
164  * is the document interpretation. Matroska really just is a
165  * EBML file.
166  */
167
168 /*
169  * Return: the amount of levels in the hierarchy that the
170  * current element lies higher than the previous one.
171  * The opposite isn't done - that's auto-done using master
172  * element reading.
173  */
174
175 static int
176 ebml_read_element_level_up (MatroskaDemuxContext *matroska)
177 {
178     ByteIOContext *pb = matroska->ctx->pb;
179     offset_t pos = url_ftell(pb);
180     int num = 0;
181
182     while (matroska->num_levels > 0) {
183         MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
184
185         if (pos >= level->start + level->length) {
186             matroska->num_levels--;
187             num++;
188         } else {
189             break;
190         }
191     }
192
193     return num;
194 }
195
196 /*
197  * Read: an "EBML number", which is defined as a variable-length
198  * array of bytes. The first byte indicates the length by giving a
199  * number of 0-bits followed by a one. The position of the first
200  * "one" bit inside the first byte indicates the length of this
201  * number.
202  * Returns: num. of bytes read. < 0 on error.
203  */
204
205 static int
206 ebml_read_num (MatroskaDemuxContext *matroska,
207                int                   max_size,
208                uint64_t             *number)
209 {
210     ByteIOContext *pb = matroska->ctx->pb;
211     int len_mask = 0x80, read = 1, n = 1;
212     int64_t total = 0;
213
214     /* the first byte tells us the length in bytes - get_byte() can normally
215      * return 0, but since that's not a valid first ebmlID byte, we can
216      * use it safely here to catch EOS. */
217     if (!(total = get_byte(pb))) {
218         /* we might encounter EOS here */
219         if (!url_feof(pb)) {
220             offset_t pos = url_ftell(pb);
221             av_log(matroska->ctx, AV_LOG_ERROR,
222                    "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
223                    pos, pos);
224         }
225         return AVERROR(EIO); /* EOS or actual I/O error */
226     }
227
228     /* get the length of the EBML number */
229     while (read <= max_size && !(total & len_mask)) {
230         read++;
231         len_mask >>= 1;
232     }
233     if (read > max_size) {
234         offset_t pos = url_ftell(pb) - 1;
235         av_log(matroska->ctx, AV_LOG_ERROR,
236                "Invalid EBML number size tag 0x%02x at pos %"PRIu64" (0x%"PRIx64")\n",
237                (uint8_t) total, pos, pos);
238         return AVERROR_INVALIDDATA;
239     }
240
241     /* read out length */
242     total &= ~len_mask;
243     while (n++ < read)
244         total = (total << 8) | get_byte(pb);
245
246     *number = total;
247
248     return read;
249 }
250
251 /*
252  * Read: the element content data ID.
253  * Return: the number of bytes read or < 0 on error.
254  */
255
256 static int
257 ebml_read_element_id (MatroskaDemuxContext *matroska,
258                       uint32_t             *id,
259                       int                  *level_up)
260 {
261     int read;
262     uint64_t total;
263
264     /* if we re-call this, use our cached ID */
265     if (matroska->peek_id != 0) {
266         if (level_up)
267             *level_up = 0;
268         *id = matroska->peek_id;
269         return 0;
270     }
271
272     /* read out the "EBML number", include tag in ID */
273     if ((read = ebml_read_num(matroska, 4, &total)) < 0)
274         return read;
275     *id = matroska->peek_id  = total | (1 << (read * 7));
276
277     /* level tracking */
278     if (level_up)
279         *level_up = ebml_read_element_level_up(matroska);
280
281     return read;
282 }
283
284 /*
285  * Read: element content length.
286  * Return: the number of bytes read or < 0 on error.
287  */
288
289 static int
290 ebml_read_element_length (MatroskaDemuxContext *matroska,
291                           uint64_t             *length)
292 {
293     /* clear cache since we're now beyond that data point */
294     matroska->peek_id = 0;
295
296     /* read out the "EBML number", include tag in ID */
297     return ebml_read_num(matroska, 8, length);
298 }
299
300 /*
301  * Return: the ID of the next element, or 0 on error.
302  * Level_up contains the amount of levels that this
303  * next element lies higher than the previous one.
304  */
305
306 static uint32_t
307 ebml_peek_id (MatroskaDemuxContext *matroska,
308               int                  *level_up)
309 {
310     uint32_t id;
311
312     if (ebml_read_element_id(matroska, &id, level_up) < 0)
313         return 0;
314
315     return id;
316 }
317
318 /*
319  * Seek to a given offset.
320  * 0 is success, -1 is failure.
321  */
322
323 static int
324 ebml_read_seek (MatroskaDemuxContext *matroska,
325                 offset_t              offset)
326 {
327     ByteIOContext *pb = matroska->ctx->pb;
328
329     /* clear ID cache, if any */
330     matroska->peek_id = 0;
331
332     return (url_fseek(pb, offset, SEEK_SET) == offset) ? 0 : -1;
333 }
334
335 /*
336  * Skip the next element.
337  * 0 is success, -1 is failure.
338  */
339
340 static int
341 ebml_read_skip (MatroskaDemuxContext *matroska)
342 {
343     ByteIOContext *pb = matroska->ctx->pb;
344     uint32_t id;
345     uint64_t length;
346     int res;
347
348     if ((res = ebml_read_element_id(matroska, &id, NULL)) < 0 ||
349         (res = ebml_read_element_length(matroska, &length)) < 0)
350         return res;
351
352     url_fskip(pb, length);
353
354     return 0;
355 }
356
357 /*
358  * Read the next element as an unsigned int.
359  * 0 is success, < 0 is failure.
360  */
361
362 static int
363 ebml_read_uint (MatroskaDemuxContext *matroska,
364                 uint32_t             *id,
365                 uint64_t             *num)
366 {
367     ByteIOContext *pb = matroska->ctx->pb;
368     int n = 0, size, res;
369     uint64_t rlength;
370
371     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
372         (res = ebml_read_element_length(matroska, &rlength)) < 0)
373         return res;
374     size = rlength;
375     if (size < 1 || size > 8) {
376         offset_t pos = url_ftell(pb);
377         av_log(matroska->ctx, AV_LOG_ERROR,
378                "Invalid uint element size %d at position %"PRId64" (0x%"PRIx64")\n",
379                 size, pos, pos);
380         return AVERROR_INVALIDDATA;
381     }
382
383     /* big-endian ordening; build up number */
384     *num = 0;
385     while (n++ < size)
386         *num = (*num << 8) | get_byte(pb);
387
388     return 0;
389 }
390
391 /*
392  * Read the next element as a signed int.
393  * 0 is success, < 0 is failure.
394  */
395
396 static int
397 ebml_read_sint (MatroskaDemuxContext *matroska,
398                 uint32_t             *id,
399                 int64_t              *num)
400 {
401     ByteIOContext *pb = matroska->ctx->pb;
402     int size, n = 1, negative = 0, res;
403     uint64_t rlength;
404
405     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
406         (res = ebml_read_element_length(matroska, &rlength)) < 0)
407         return res;
408     size = rlength;
409     if (size < 1 || size > 8) {
410         offset_t pos = url_ftell(pb);
411         av_log(matroska->ctx, AV_LOG_ERROR,
412                "Invalid sint element size %d at position %"PRId64" (0x%"PRIx64")\n",
413                 size, pos, pos);
414         return AVERROR_INVALIDDATA;
415     }
416     if ((*num = get_byte(pb)) & 0x80) {
417         negative = 1;
418         *num &= ~0x80;
419     }
420     while (n++ < size)
421         *num = (*num << 8) | get_byte(pb);
422
423     /* make signed */
424     if (negative)
425         *num = *num - (1LL << ((8 * size) - 1));
426
427     return 0;
428 }
429
430 /*
431  * Read the next element as a float.
432  * 0 is success, < 0 is failure.
433  */
434
435 static int
436 ebml_read_float (MatroskaDemuxContext *matroska,
437                  uint32_t             *id,
438                  double               *num)
439 {
440     ByteIOContext *pb = matroska->ctx->pb;
441     int size, res;
442     uint64_t rlength;
443
444     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
445         (res = ebml_read_element_length(matroska, &rlength)) < 0)
446         return res;
447     size = rlength;
448
449     if (size == 4) {
450         *num= av_int2flt(get_be32(pb));
451     } else if(size==8){
452         *num= av_int2dbl(get_be64(pb));
453     } else{
454         offset_t pos = url_ftell(pb);
455         av_log(matroska->ctx, AV_LOG_ERROR,
456                "Invalid float element size %d at position %"PRIu64" (0x%"PRIx64")\n",
457                size, pos, pos);
458         return AVERROR_INVALIDDATA;
459     }
460
461     return 0;
462 }
463
464 /*
465  * Read the next element as an ASCII string.
466  * 0 is success, < 0 is failure.
467  */
468
469 static int
470 ebml_read_ascii (MatroskaDemuxContext *matroska,
471                  uint32_t             *id,
472                  char                **str)
473 {
474     ByteIOContext *pb = matroska->ctx->pb;
475     int size, res;
476     uint64_t rlength;
477
478     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
479         (res = ebml_read_element_length(matroska, &rlength)) < 0)
480         return res;
481     size = rlength;
482
483     /* ebml strings are usually not 0-terminated, so we allocate one
484      * byte more, read the string and NULL-terminate it ourselves. */
485     if (size < 0 || !(*str = av_malloc(size + 1))) {
486         av_log(matroska->ctx, AV_LOG_ERROR, "Memory allocation failed\n");
487         return AVERROR(ENOMEM);
488     }
489     if (get_buffer(pb, (uint8_t *) *str, size) != size) {
490         offset_t pos = url_ftell(pb);
491         av_log(matroska->ctx, AV_LOG_ERROR,
492                "Read error at pos. %"PRIu64" (0x%"PRIx64")\n", pos, pos);
493         return AVERROR(EIO);
494     }
495     (*str)[size] = '\0';
496
497     return 0;
498 }
499
500 /*
501  * Read the next element as a UTF-8 string.
502  * 0 is success, < 0 is failure.
503  */
504
505 static int
506 ebml_read_utf8 (MatroskaDemuxContext *matroska,
507                 uint32_t             *id,
508                 char                **str)
509 {
510   return ebml_read_ascii(matroska, id, str);
511 }
512
513 /*
514  * Read the next element as a date (nanoseconds since 1/1/2000).
515  * 0 is success, < 0 is failure.
516  */
517
518 static int
519 ebml_read_date (MatroskaDemuxContext *matroska,
520                 uint32_t             *id,
521                 int64_t              *date)
522 {
523   return ebml_read_sint(matroska, id, date);
524 }
525
526 /*
527  * Read the next element, but only the header. The contents
528  * are supposed to be sub-elements which can be read separately.
529  * 0 is success, < 0 is failure.
530  */
531
532 static int
533 ebml_read_master (MatroskaDemuxContext *matroska,
534                   uint32_t             *id)
535 {
536     ByteIOContext *pb = matroska->ctx->pb;
537     uint64_t length;
538     MatroskaLevel *level;
539     int res;
540
541     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
542         (res = ebml_read_element_length(matroska, &length)) < 0)
543         return res;
544
545     /* protect... (Heaven forbids that the '>' is true) */
546     if (matroska->num_levels >= EBML_MAX_DEPTH) {
547         av_log(matroska->ctx, AV_LOG_ERROR,
548                "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
549         return AVERROR(ENOSYS);
550     }
551
552     /* remember level */
553     level = &matroska->levels[matroska->num_levels++];
554     level->start = url_ftell(pb);
555     level->length = length;
556
557     return 0;
558 }
559
560 /*
561  * Read the next element as binary data.
562  * 0 is success, < 0 is failure.
563  */
564
565 static int
566 ebml_read_binary (MatroskaDemuxContext *matroska,
567                   uint32_t             *id,
568                   uint8_t             **binary,
569                   int                  *size)
570 {
571     ByteIOContext *pb = matroska->ctx->pb;
572     uint64_t rlength;
573     int res;
574
575     if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
576         (res = ebml_read_element_length(matroska, &rlength)) < 0)
577         return res;
578     *size = rlength;
579
580     if (!(*binary = av_malloc(*size))) {
581         av_log(matroska->ctx, AV_LOG_ERROR,
582                "Memory allocation error\n");
583         return AVERROR(ENOMEM);
584     }
585
586     if (get_buffer(pb, *binary, *size) != *size) {
587         offset_t pos = url_ftell(pb);
588         av_log(matroska->ctx, AV_LOG_ERROR,
589                "Read error at pos. %"PRIu64" (0x%"PRIx64")\n", pos, pos);
590         return AVERROR(EIO);
591     }
592
593     return 0;
594 }
595
596 /*
597  * Read signed/unsigned "EBML" numbers.
598  * Return: number of bytes processed, < 0 on error.
599  * XXX: use ebml_read_num().
600  */
601
602 static int
603 matroska_ebmlnum_uint (uint8_t  *data,
604                        uint32_t  size,
605                        uint64_t *num)
606 {
607     int len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
608     uint64_t total;
609
610     if (size <= 0)
611         return AVERROR_INVALIDDATA;
612
613     total = data[0];
614     while (read <= 8 && !(total & len_mask)) {
615         read++;
616         len_mask >>= 1;
617     }
618     if (read > 8)
619         return AVERROR_INVALIDDATA;
620
621     if ((total &= (len_mask - 1)) == len_mask - 1)
622         num_ffs++;
623     if (size < read)
624         return AVERROR_INVALIDDATA;
625     while (n < read) {
626         if (data[n] == 0xff)
627             num_ffs++;
628         total = (total << 8) | data[n];
629         n++;
630     }
631
632     if (read == num_ffs)
633         *num = (uint64_t)-1;
634     else
635         *num = total;
636
637     return read;
638 }
639
640 /*
641  * Same as above, but signed.
642  */
643
644 static int
645 matroska_ebmlnum_sint (uint8_t  *data,
646                        uint32_t  size,
647                        int64_t  *num)
648 {
649     uint64_t unum;
650     int res;
651
652     /* read as unsigned number first */
653     if ((res = matroska_ebmlnum_uint(data, size, &unum)) < 0)
654         return res;
655
656     /* make signed (weird way) */
657     if (unum == (uint64_t)-1)
658         *num = INT64_MAX;
659     else
660         *num = unum - ((1LL << ((7 * res) - 1)) - 1);
661
662     return res;
663 }
664
665 /*
666  * Read an EBML header.
667  * 0 is success, < 0 is failure.
668  */
669
670 static int
671 ebml_read_header (MatroskaDemuxContext *matroska,
672                   char                **doctype,
673                   int                  *version)
674 {
675     uint32_t id;
676     int level_up, res = 0;
677
678     /* default init */
679     if (doctype)
680         *doctype = NULL;
681     if (version)
682         *version = 1;
683
684     if (!(id = ebml_peek_id(matroska, &level_up)) ||
685         level_up != 0 || id != EBML_ID_HEADER) {
686         av_log(matroska->ctx, AV_LOG_ERROR,
687                "This is not an EBML file (id=0x%x/0x%x)\n", id, EBML_ID_HEADER);
688         return AVERROR_INVALIDDATA;
689     }
690     if ((res = ebml_read_master(matroska, &id)) < 0)
691         return res;
692
693     while (res == 0) {
694         if (!(id = ebml_peek_id(matroska, &level_up)))
695             return AVERROR(EIO);
696
697         /* end-of-header */
698         if (level_up)
699             break;
700
701         switch (id) {
702             /* is our read version uptodate? */
703             case EBML_ID_EBMLREADVERSION: {
704                 uint64_t num;
705
706                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
707                     return res;
708                 if (num > EBML_VERSION) {
709                     av_log(matroska->ctx, AV_LOG_ERROR,
710                            "EBML version %"PRIu64" (> %d) is not supported\n",
711                            num, EBML_VERSION);
712                     return AVERROR_INVALIDDATA;
713                 }
714                 break;
715             }
716
717             /* we only handle 8 byte lengths at max */
718             case EBML_ID_EBMLMAXSIZELENGTH: {
719                 uint64_t num;
720
721                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
722                     return res;
723                 if (num > sizeof(uint64_t)) {
724                     av_log(matroska->ctx, AV_LOG_ERROR,
725                            "Integers of size %"PRIu64" (> %zd) not supported\n",
726                            num, sizeof(uint64_t));
727                     return AVERROR_INVALIDDATA;
728                 }
729                 break;
730             }
731
732             /* we handle 4 byte IDs at max */
733             case EBML_ID_EBMLMAXIDLENGTH: {
734                 uint64_t num;
735
736                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
737                     return res;
738                 if (num > sizeof(uint32_t)) {
739                     av_log(matroska->ctx, AV_LOG_ERROR,
740                            "IDs of size %"PRIu64" (> %zu) not supported\n",
741                             num, sizeof(uint32_t));
742                     return AVERROR_INVALIDDATA;
743                 }
744                 break;
745             }
746
747             case EBML_ID_DOCTYPE: {
748                 char *text;
749
750                 if ((res = ebml_read_ascii(matroska, &id, &text)) < 0)
751                     return res;
752                 if (doctype) {
753                     if (*doctype)
754                         av_free(*doctype);
755                     *doctype = text;
756                 } else
757                     av_free(text);
758                 break;
759             }
760
761             case EBML_ID_DOCTYPEREADVERSION: {
762                 uint64_t num;
763
764                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
765                     return res;
766                 if (version)
767                     *version = num;
768                 break;
769             }
770
771             default:
772                 av_log(matroska->ctx, AV_LOG_INFO,
773                        "Unknown data type 0x%x in EBML header", id);
774                 /* pass-through */
775
776             case EBML_ID_VOID:
777             /* we ignore these two, as they don't tell us anything we
778              * care about */
779             case EBML_ID_EBMLVERSION:
780             case EBML_ID_DOCTYPEVERSION:
781                 res = ebml_read_skip (matroska);
782                 break;
783         }
784     }
785
786     return 0;
787 }
788
789
790 static int
791 matroska_find_track_by_num (MatroskaDemuxContext *matroska,
792                             int                   num)
793 {
794     int i;
795
796     for (i = 0; i < matroska->num_tracks; i++)
797         if (matroska->tracks[i]->num == num)
798             return i;
799
800     return -1;
801 }
802
803
804 /*
805  * Put one packet in an application-supplied AVPacket struct.
806  * Returns 0 on success or -1 on failure.
807  */
808
809 static int
810 matroska_deliver_packet (MatroskaDemuxContext *matroska,
811                          AVPacket             *pkt)
812 {
813     if (matroska->num_packets > 0) {
814         memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
815         av_free(matroska->packets[0]);
816         if (matroska->num_packets > 1) {
817             memmove(&matroska->packets[0], &matroska->packets[1],
818                     (matroska->num_packets - 1) * sizeof(AVPacket *));
819             matroska->packets =
820                 av_realloc(matroska->packets, (matroska->num_packets - 1) *
821                            sizeof(AVPacket *));
822         } else {
823             av_freep(&matroska->packets);
824         }
825         matroska->num_packets--;
826         return 0;
827     }
828
829     return -1;
830 }
831
832 /*
833  * Put a packet into our internal queue. Will be delivered to the
834  * user/application during the next get_packet() call.
835  */
836
837 static void
838 matroska_queue_packet (MatroskaDemuxContext *matroska,
839                        AVPacket             *pkt)
840 {
841     matroska->packets =
842         av_realloc(matroska->packets, (matroska->num_packets + 1) *
843                    sizeof(AVPacket *));
844     matroska->packets[matroska->num_packets] = pkt;
845     matroska->num_packets++;
846 }
847
848 /*
849  * Free all packets in our internal queue.
850  */
851 static void
852 matroska_clear_queue (MatroskaDemuxContext *matroska)
853 {
854     if (matroska->packets) {
855         int n;
856         for (n = 0; n < matroska->num_packets; n++) {
857             av_free_packet(matroska->packets[n]);
858             av_free(matroska->packets[n]);
859         }
860         av_free(matroska->packets);
861         matroska->packets = NULL;
862         matroska->num_packets = 0;
863     }
864 }
865
866
867 /*
868  * Autodetecting...
869  */
870
871 static int
872 matroska_probe (AVProbeData *p)
873 {
874     uint64_t total = 0;
875     int len_mask = 0x80, size = 1, n = 1;
876     uint8_t probe_data[] = { 'm', 'a', 't', 'r', 'o', 's', 'k', 'a' };
877
878     /* ebml header? */
879     if (AV_RB32(p->buf) != EBML_ID_HEADER)
880         return 0;
881
882     /* length of header */
883     total = p->buf[4];
884     while (size <= 8 && !(total & len_mask)) {
885         size++;
886         len_mask >>= 1;
887     }
888     if (size > 8)
889       return 0;
890     total &= (len_mask - 1);
891     while (n < size)
892         total = (total << 8) | p->buf[4 + n++];
893
894     /* does the probe data contain the whole header? */
895     if (p->buf_size < 4 + size + total)
896       return 0;
897
898     /* the header must contain the document type 'matroska'. For now,
899      * we don't parse the whole header but simply check for the
900      * availability of that array of characters inside the header.
901      * Not fully fool-proof, but good enough. */
902     for (n = 4 + size; n <= 4 + size + total - sizeof(probe_data); n++)
903         if (!memcmp (&p->buf[n], probe_data, sizeof(probe_data)))
904             return AVPROBE_SCORE_MAX;
905
906     return 0;
907 }
908
909 /*
910  * From here on, it's all XML-style DTD stuff... Needs no comments.
911  */
912
913 static int
914 matroska_parse_info (MatroskaDemuxContext *matroska)
915 {
916     int res = 0;
917     uint32_t id;
918
919     av_log(matroska->ctx, AV_LOG_DEBUG, "Parsing info...\n");
920
921     while (res == 0) {
922         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
923             res = AVERROR(EIO);
924             break;
925         } else if (matroska->level_up) {
926             matroska->level_up--;
927             break;
928         }
929
930         switch (id) {
931             /* cluster timecode */
932             case MATROSKA_ID_TIMECODESCALE: {
933                 uint64_t num;
934                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
935                     break;
936                 matroska->time_scale = num;
937                 break;
938             }
939
940             case MATROSKA_ID_DURATION: {
941                 double num;
942                 if ((res = ebml_read_float(matroska, &id, &num)) < 0)
943                     break;
944                 matroska->ctx->duration = num * matroska->time_scale * 1000 / AV_TIME_BASE;
945                 break;
946             }
947
948             case MATROSKA_ID_TITLE: {
949                 char *text;
950                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
951                     break;
952                 strncpy(matroska->ctx->title, text,
953                         sizeof(matroska->ctx->title)-1);
954                 av_free(text);
955                 break;
956             }
957
958             case MATROSKA_ID_WRITINGAPP: {
959                 char *text;
960                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
961                     break;
962                 matroska->writing_app = text;
963                 break;
964             }
965
966             case MATROSKA_ID_MUXINGAPP: {
967                 char *text;
968                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
969                     break;
970                 matroska->muxing_app = text;
971                 break;
972             }
973
974             case MATROSKA_ID_DATEUTC: {
975                 int64_t time;
976                 if ((res = ebml_read_date(matroska, &id, &time)) < 0)
977                     break;
978                 matroska->created = time;
979                 break;
980             }
981
982             default:
983                 av_log(matroska->ctx, AV_LOG_INFO,
984                        "Unknown entry 0x%x in info header\n", id);
985                 /* fall-through */
986
987             case EBML_ID_VOID:
988                 res = ebml_read_skip(matroska);
989                 break;
990         }
991
992         if (matroska->level_up) {
993             matroska->level_up--;
994             break;
995         }
996     }
997
998     return res;
999 }
1000
1001 static int
1002 matroska_add_stream (MatroskaDemuxContext *matroska)
1003 {
1004     int res = 0;
1005     uint32_t id;
1006     MatroskaTrack *track;
1007
1008     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing track, adding stream..,\n");
1009
1010     /* Allocate a generic track. As soon as we know its type we'll realloc. */
1011     track = av_mallocz(MAX_TRACK_SIZE);
1012     matroska->num_tracks++;
1013     strcpy(track->language, "eng");
1014
1015     /* start with the master */
1016     if ((res = ebml_read_master(matroska, &id)) < 0)
1017         return res;
1018
1019     /* try reading the trackentry headers */
1020     while (res == 0) {
1021         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1022             res = AVERROR(EIO);
1023             break;
1024         } else if (matroska->level_up > 0) {
1025             matroska->level_up--;
1026             break;
1027         }
1028
1029         switch (id) {
1030             /* track number (unique stream ID) */
1031             case MATROSKA_ID_TRACKNUMBER: {
1032                 uint64_t num;
1033                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1034                     break;
1035                 track->num = num;
1036                 break;
1037             }
1038
1039             /* track UID (unique identifier) */
1040             case MATROSKA_ID_TRACKUID: {
1041                 uint64_t num;
1042                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1043                     break;
1044                 track->uid = num;
1045                 break;
1046             }
1047
1048             /* track type (video, audio, combined, subtitle, etc.) */
1049             case MATROSKA_ID_TRACKTYPE: {
1050                 uint64_t num;
1051                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1052                     break;
1053                 if (track->type && track->type != num) {
1054                     av_log(matroska->ctx, AV_LOG_INFO,
1055                            "More than one tracktype in an entry - skip\n");
1056                     break;
1057                 }
1058                 track->type = num;
1059
1060                 switch (track->type) {
1061                     case MATROSKA_TRACK_TYPE_VIDEO:
1062                     case MATROSKA_TRACK_TYPE_AUDIO:
1063                     case MATROSKA_TRACK_TYPE_SUBTITLE:
1064                         break;
1065                     case MATROSKA_TRACK_TYPE_COMPLEX:
1066                     case MATROSKA_TRACK_TYPE_LOGO:
1067                     case MATROSKA_TRACK_TYPE_CONTROL:
1068                     default:
1069                         av_log(matroska->ctx, AV_LOG_INFO,
1070                                "Unknown or unsupported track type 0x%x\n",
1071                                track->type);
1072                         track->type = 0;
1073                         break;
1074                 }
1075                 matroska->tracks[matroska->num_tracks - 1] = track;
1076                 break;
1077             }
1078
1079             /* tracktype specific stuff for video */
1080             case MATROSKA_ID_TRACKVIDEO: {
1081                 MatroskaVideoTrack *videotrack;
1082                 if (!track->type)
1083                     track->type = MATROSKA_TRACK_TYPE_VIDEO;
1084                 if (track->type != MATROSKA_TRACK_TYPE_VIDEO) {
1085                     av_log(matroska->ctx, AV_LOG_INFO,
1086                            "video data in non-video track - ignoring\n");
1087                     res = AVERROR_INVALIDDATA;
1088                     break;
1089                 } else if ((res = ebml_read_master(matroska, &id)) < 0)
1090                     break;
1091                 videotrack = (MatroskaVideoTrack *)track;
1092
1093                 while (res == 0) {
1094                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1095                         res = AVERROR(EIO);
1096                         break;
1097                     } else if (matroska->level_up > 0) {
1098                         matroska->level_up--;
1099                         break;
1100                     }
1101
1102                     switch (id) {
1103                         /* fixme, this should be one-up, but I get it here */
1104                         case MATROSKA_ID_TRACKDEFAULTDURATION: {
1105                             uint64_t num;
1106                             if ((res = ebml_read_uint (matroska, &id,
1107                                                        &num)) < 0)
1108                                 break;
1109                             track->default_duration = num;
1110                             break;
1111                         }
1112
1113                         /* video framerate */
1114                         case MATROSKA_ID_VIDEOFRAMERATE: {
1115                             double num;
1116                             if ((res = ebml_read_float(matroska, &id,
1117                                                        &num)) < 0)
1118                                 break;
1119                             if (!track->default_duration)
1120                                 track->default_duration = 1000000000/num;
1121                             break;
1122                         }
1123
1124                         /* width of the size to display the video at */
1125                         case MATROSKA_ID_VIDEODISPLAYWIDTH: {
1126                             uint64_t num;
1127                             if ((res = ebml_read_uint(matroska, &id,
1128                                                       &num)) < 0)
1129                                 break;
1130                             videotrack->display_width = num;
1131                             break;
1132                         }
1133
1134                         /* height of the size to display the video at */
1135                         case MATROSKA_ID_VIDEODISPLAYHEIGHT: {
1136                             uint64_t num;
1137                             if ((res = ebml_read_uint(matroska, &id,
1138                                                       &num)) < 0)
1139                                 break;
1140                             videotrack->display_height = num;
1141                             break;
1142                         }
1143
1144                         /* width of the video in the file */
1145                         case MATROSKA_ID_VIDEOPIXELWIDTH: {
1146                             uint64_t num;
1147                             if ((res = ebml_read_uint(matroska, &id,
1148                                                       &num)) < 0)
1149                                 break;
1150                             videotrack->pixel_width = num;
1151                             break;
1152                         }
1153
1154                         /* height of the video in the file */
1155                         case MATROSKA_ID_VIDEOPIXELHEIGHT: {
1156                             uint64_t num;
1157                             if ((res = ebml_read_uint(matroska, &id,
1158                                                       &num)) < 0)
1159                                 break;
1160                             videotrack->pixel_height = num;
1161                             break;
1162                         }
1163
1164                         /* whether the video is interlaced */
1165                         case MATROSKA_ID_VIDEOFLAGINTERLACED: {
1166                             uint64_t num;
1167                             if ((res = ebml_read_uint(matroska, &id,
1168                                                       &num)) < 0)
1169                                 break;
1170                             if (num)
1171                                 track->flags |=
1172                                     MATROSKA_VIDEOTRACK_INTERLACED;
1173                             else
1174                                 track->flags &=
1175                                     ~MATROSKA_VIDEOTRACK_INTERLACED;
1176                             break;
1177                         }
1178
1179                         /* stereo mode (whether the video has two streams,
1180                          * where one is for the left eye and the other for
1181                          * the right eye, which creates a 3D-like
1182                          * effect) */
1183                         case MATROSKA_ID_VIDEOSTEREOMODE: {
1184                             uint64_t num;
1185                             if ((res = ebml_read_uint(matroska, &id,
1186                                                       &num)) < 0)
1187                                 break;
1188                             if (num != MATROSKA_EYE_MODE_MONO &&
1189                                 num != MATROSKA_EYE_MODE_LEFT &&
1190                                 num != MATROSKA_EYE_MODE_RIGHT &&
1191                                 num != MATROSKA_EYE_MODE_BOTH) {
1192                                 av_log(matroska->ctx, AV_LOG_INFO,
1193                                        "Ignoring unknown eye mode 0x%x\n",
1194                                        (uint32_t) num);
1195                                 break;
1196                             }
1197                             videotrack->eye_mode = num;
1198                             break;
1199                         }
1200
1201                         /* aspect ratio behaviour */
1202                         case MATROSKA_ID_VIDEOASPECTRATIO: {
1203                             uint64_t num;
1204                             if ((res = ebml_read_uint(matroska, &id,
1205                                                       &num)) < 0)
1206                                 break;
1207                             if (num != MATROSKA_ASPECT_RATIO_MODE_FREE &&
1208                                 num != MATROSKA_ASPECT_RATIO_MODE_KEEP &&
1209                                 num != MATROSKA_ASPECT_RATIO_MODE_FIXED) {
1210                                 av_log(matroska->ctx, AV_LOG_INFO,
1211                                        "Ignoring unknown aspect ratio 0x%x\n",
1212                                        (uint32_t) num);
1213                                 break;
1214                             }
1215                             videotrack->ar_mode = num;
1216                             break;
1217                         }
1218
1219                         /* colorspace (only matters for raw video)
1220                          * fourcc */
1221                         case MATROSKA_ID_VIDEOCOLORSPACE: {
1222                             uint64_t num;
1223                             if ((res = ebml_read_uint(matroska, &id,
1224                                                       &num)) < 0)
1225                                 break;
1226                             videotrack->fourcc = num;
1227                             break;
1228                         }
1229
1230                         default:
1231                             av_log(matroska->ctx, AV_LOG_INFO,
1232                                    "Unknown video track header entry "
1233                                    "0x%x - ignoring\n", id);
1234                             /* pass-through */
1235
1236                         case EBML_ID_VOID:
1237                             res = ebml_read_skip(matroska);
1238                             break;
1239                     }
1240
1241                     if (matroska->level_up) {
1242                         matroska->level_up--;
1243                         break;
1244                     }
1245                 }
1246                 break;
1247             }
1248
1249             /* tracktype specific stuff for audio */
1250             case MATROSKA_ID_TRACKAUDIO: {
1251                 MatroskaAudioTrack *audiotrack;
1252                 if (!track->type)
1253                     track->type = MATROSKA_TRACK_TYPE_AUDIO;
1254                 if (track->type != MATROSKA_TRACK_TYPE_AUDIO) {
1255                     av_log(matroska->ctx, AV_LOG_INFO,
1256                            "audio data in non-audio track - ignoring\n");
1257                     res = AVERROR_INVALIDDATA;
1258                     break;
1259                 } else if ((res = ebml_read_master(matroska, &id)) < 0)
1260                     break;
1261                 audiotrack = (MatroskaAudioTrack *)track;
1262                 audiotrack->channels = 1;
1263                 audiotrack->samplerate = 8000;
1264
1265                 while (res == 0) {
1266                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1267                         res = AVERROR(EIO);
1268                         break;
1269                     } else if (matroska->level_up > 0) {
1270                         matroska->level_up--;
1271                         break;
1272                     }
1273
1274                     switch (id) {
1275                         /* samplerate */
1276                         case MATROSKA_ID_AUDIOSAMPLINGFREQ: {
1277                             double num;
1278                             if ((res = ebml_read_float(matroska, &id,
1279                                                        &num)) < 0)
1280                                 break;
1281                             audiotrack->internal_samplerate =
1282                             audiotrack->samplerate = num;
1283                             break;
1284                         }
1285
1286                         case MATROSKA_ID_AUDIOOUTSAMPLINGFREQ: {
1287                             double num;
1288                             if ((res = ebml_read_float(matroska, &id,
1289                                                        &num)) < 0)
1290                                 break;
1291                             audiotrack->samplerate = num;
1292                             break;
1293                         }
1294
1295                             /* bitdepth */
1296                         case MATROSKA_ID_AUDIOBITDEPTH: {
1297                             uint64_t num;
1298                             if ((res = ebml_read_uint(matroska, &id,
1299                                                       &num)) < 0)
1300                                 break;
1301                             audiotrack->bitdepth = num;
1302                             break;
1303                         }
1304
1305                             /* channels */
1306                         case MATROSKA_ID_AUDIOCHANNELS: {
1307                             uint64_t num;
1308                             if ((res = ebml_read_uint(matroska, &id,
1309                                                       &num)) < 0)
1310                                 break;
1311                             audiotrack->channels = num;
1312                             break;
1313                         }
1314
1315                         default:
1316                             av_log(matroska->ctx, AV_LOG_INFO,
1317                                    "Unknown audio track header entry "
1318                                    "0x%x - ignoring\n", id);
1319                             /* pass-through */
1320
1321                         case EBML_ID_VOID:
1322                             res = ebml_read_skip(matroska);
1323                             break;
1324                     }
1325
1326                     if (matroska->level_up) {
1327                         matroska->level_up--;
1328                         break;
1329                     }
1330                 }
1331                 break;
1332             }
1333
1334                 /* codec identifier */
1335             case MATROSKA_ID_CODECID: {
1336                 char *text;
1337                 if ((res = ebml_read_ascii(matroska, &id, &text)) < 0)
1338                     break;
1339                 track->codec_id = text;
1340                 break;
1341             }
1342
1343                 /* codec private data */
1344             case MATROSKA_ID_CODECPRIVATE: {
1345                 uint8_t *data;
1346                 int size;
1347                 if ((res = ebml_read_binary(matroska, &id, &data, &size) < 0))
1348                     break;
1349                 track->codec_priv = data;
1350                 track->codec_priv_size = size;
1351                 break;
1352             }
1353
1354                 /* name of the codec */
1355             case MATROSKA_ID_CODECNAME: {
1356                 char *text;
1357                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1358                     break;
1359                 track->codec_name = text;
1360                 break;
1361             }
1362
1363                 /* name of this track */
1364             case MATROSKA_ID_TRACKNAME: {
1365                 char *text;
1366                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1367                     break;
1368                 track->name = text;
1369                 break;
1370             }
1371
1372                 /* language (matters for audio/subtitles, mostly) */
1373             case MATROSKA_ID_TRACKLANGUAGE: {
1374                 char *text, *end;
1375                 if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1376                     break;
1377                 if ((end = strchr(text, '-')))
1378                     *end = '\0';
1379                 if (strlen(text) == 3)
1380                     strcpy(track->language, text);
1381                 av_free(text);
1382                 break;
1383             }
1384
1385                 /* whether this is actually used */
1386             case MATROSKA_ID_TRACKFLAGENABLED: {
1387                 uint64_t num;
1388                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1389                     break;
1390                 if (num)
1391                     track->flags |= MATROSKA_TRACK_ENABLED;
1392                 else
1393                     track->flags &= ~MATROSKA_TRACK_ENABLED;
1394                 break;
1395             }
1396
1397                 /* whether it's the default for this track type */
1398             case MATROSKA_ID_TRACKFLAGDEFAULT: {
1399                 uint64_t num;
1400                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1401                     break;
1402                 if (num)
1403                     track->flags |= MATROSKA_TRACK_DEFAULT;
1404                 else
1405                     track->flags &= ~MATROSKA_TRACK_DEFAULT;
1406                 break;
1407             }
1408
1409                 /* lacing (like MPEG, where blocks don't end/start on frame
1410                  * boundaries) */
1411             case MATROSKA_ID_TRACKFLAGLACING: {
1412                 uint64_t num;
1413                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1414                     break;
1415                 if (num)
1416                     track->flags |= MATROSKA_TRACK_LACING;
1417                 else
1418                     track->flags &= ~MATROSKA_TRACK_LACING;
1419                 break;
1420             }
1421
1422                 /* default length (in time) of one data block in this track */
1423             case MATROSKA_ID_TRACKDEFAULTDURATION: {
1424                 uint64_t num;
1425                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1426                     break;
1427                 track->default_duration = num;
1428                 break;
1429             }
1430
1431             default:
1432                 av_log(matroska->ctx, AV_LOG_INFO,
1433                        "Unknown track header entry 0x%x - ignoring\n", id);
1434                 /* pass-through */
1435
1436             case EBML_ID_VOID:
1437             /* we ignore these because they're nothing useful. */
1438             case MATROSKA_ID_CODECINFOURL:
1439             case MATROSKA_ID_CODECDOWNLOADURL:
1440             case MATROSKA_ID_TRACKMINCACHE:
1441             case MATROSKA_ID_TRACKMAXCACHE:
1442                 res = ebml_read_skip(matroska);
1443                 break;
1444         }
1445
1446         if (matroska->level_up) {
1447             matroska->level_up--;
1448             break;
1449         }
1450     }
1451
1452     return res;
1453 }
1454
1455 static int
1456 matroska_parse_tracks (MatroskaDemuxContext *matroska)
1457 {
1458     int res = 0;
1459     uint32_t id;
1460
1461     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing tracks...\n");
1462
1463     while (res == 0) {
1464         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1465             res = AVERROR(EIO);
1466             break;
1467         } else if (matroska->level_up) {
1468             matroska->level_up--;
1469             break;
1470         }
1471
1472         switch (id) {
1473             /* one track within the "all-tracks" header */
1474             case MATROSKA_ID_TRACKENTRY:
1475                 res = matroska_add_stream(matroska);
1476                 break;
1477
1478             default:
1479                 av_log(matroska->ctx, AV_LOG_INFO,
1480                        "Unknown entry 0x%x in track header\n", id);
1481                 /* fall-through */
1482
1483             case EBML_ID_VOID:
1484                 res = ebml_read_skip(matroska);
1485                 break;
1486         }
1487
1488         if (matroska->level_up) {
1489             matroska->level_up--;
1490             break;
1491         }
1492     }
1493
1494     return res;
1495 }
1496
1497 static int
1498 matroska_parse_index (MatroskaDemuxContext *matroska)
1499 {
1500     int res = 0;
1501     uint32_t id;
1502     MatroskaDemuxIndex idx;
1503
1504     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing index...\n");
1505
1506     while (res == 0) {
1507         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1508             res = AVERROR(EIO);
1509             break;
1510         } else if (matroska->level_up) {
1511             matroska->level_up--;
1512             break;
1513         }
1514
1515         switch (id) {
1516             /* one single index entry ('point') */
1517             case MATROSKA_ID_POINTENTRY:
1518                 if ((res = ebml_read_master(matroska, &id)) < 0)
1519                     break;
1520
1521                 /* in the end, we hope to fill one entry with a
1522                  * timestamp, a file position and a tracknum */
1523                 idx.pos   = (uint64_t) -1;
1524                 idx.time  = (uint64_t) -1;
1525                 idx.track = (uint16_t) -1;
1526
1527                 while (res == 0) {
1528                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1529                         res = AVERROR(EIO);
1530                         break;
1531                     } else if (matroska->level_up) {
1532                         matroska->level_up--;
1533                         break;
1534                     }
1535
1536                     switch (id) {
1537                         /* one single index entry ('point') */
1538                         case MATROSKA_ID_CUETIME: {
1539                             uint64_t time;
1540                             if ((res = ebml_read_uint(matroska, &id,
1541                                                       &time)) < 0)
1542                                 break;
1543                             idx.time = time * matroska->time_scale;
1544                             break;
1545                         }
1546
1547                         /* position in the file + track to which it
1548                          * belongs */
1549                         case MATROSKA_ID_CUETRACKPOSITION:
1550                             if ((res = ebml_read_master(matroska, &id)) < 0)
1551                                 break;
1552
1553                             while (res == 0) {
1554                                 if (!(id = ebml_peek_id (matroska,
1555                                                     &matroska->level_up))) {
1556                                     res = AVERROR(EIO);
1557                                     break;
1558                                 } else if (matroska->level_up) {
1559                                     matroska->level_up--;
1560                                     break;
1561                                 }
1562
1563                                 switch (id) {
1564                                     /* track number */
1565                                     case MATROSKA_ID_CUETRACK: {
1566                                         uint64_t num;
1567                                         if ((res = ebml_read_uint(matroska,
1568                                                           &id, &num)) < 0)
1569                                             break;
1570                                         idx.track = num;
1571                                         break;
1572                                     }
1573
1574                                         /* position in file */
1575                                     case MATROSKA_ID_CUECLUSTERPOSITION: {
1576                                         uint64_t num;
1577                                         if ((res = ebml_read_uint(matroska,
1578                                                           &id, &num)) < 0)
1579                                             break;
1580                                         idx.pos = num+matroska->segment_start;
1581                                         break;
1582                                     }
1583
1584                                     default:
1585                                         av_log(matroska->ctx, AV_LOG_INFO,
1586                                                "Unknown entry 0x%x in "
1587                                                "CuesTrackPositions\n", id);
1588                                         /* fall-through */
1589
1590                                     case EBML_ID_VOID:
1591                                         res = ebml_read_skip(matroska);
1592                                         break;
1593                                 }
1594
1595                                 if (matroska->level_up) {
1596                                     matroska->level_up--;
1597                                     break;
1598                                 }
1599                             }
1600
1601                             break;
1602
1603                         default:
1604                             av_log(matroska->ctx, AV_LOG_INFO,
1605                                    "Unknown entry 0x%x in cuespoint "
1606                                    "index\n", id);
1607                             /* fall-through */
1608
1609                         case EBML_ID_VOID:
1610                             res = ebml_read_skip(matroska);
1611                             break;
1612                     }
1613
1614                     if (matroska->level_up) {
1615                         matroska->level_up--;
1616                         break;
1617                     }
1618                 }
1619
1620                 /* so let's see if we got what we wanted */
1621                 if (idx.pos   != (uint64_t) -1 &&
1622                     idx.time  != (uint64_t) -1 &&
1623                     idx.track != (uint16_t) -1) {
1624                     if (matroska->num_indexes % 32 == 0) {
1625                         /* re-allocate bigger index */
1626                         matroska->index =
1627                             av_realloc(matroska->index,
1628                                        (matroska->num_indexes + 32) *
1629                                        sizeof(MatroskaDemuxIndex));
1630                     }
1631                     matroska->index[matroska->num_indexes] = idx;
1632                     matroska->num_indexes++;
1633                 }
1634                 break;
1635
1636             default:
1637                 av_log(matroska->ctx, AV_LOG_INFO,
1638                        "Unknown entry 0x%x in cues header\n", id);
1639                 /* fall-through */
1640
1641             case EBML_ID_VOID:
1642                 res = ebml_read_skip(matroska);
1643                 break;
1644         }
1645
1646         if (matroska->level_up) {
1647             matroska->level_up--;
1648             break;
1649         }
1650     }
1651
1652     return res;
1653 }
1654
1655 static int
1656 matroska_parse_metadata (MatroskaDemuxContext *matroska)
1657 {
1658     int res = 0;
1659     uint32_t id;
1660
1661     while (res == 0) {
1662         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1663             res = AVERROR(EIO);
1664             break;
1665         } else if (matroska->level_up) {
1666             matroska->level_up--;
1667             break;
1668         }
1669
1670         switch (id) {
1671             /* Hm, this is unsupported... */
1672             default:
1673                 av_log(matroska->ctx, AV_LOG_INFO,
1674                        "Unknown entry 0x%x in metadata header\n", id);
1675                 /* fall-through */
1676
1677             case EBML_ID_VOID:
1678                 res = ebml_read_skip(matroska);
1679                 break;
1680         }
1681
1682         if (matroska->level_up) {
1683             matroska->level_up--;
1684             break;
1685         }
1686     }
1687
1688     return res;
1689 }
1690
1691 static int
1692 matroska_parse_seekhead (MatroskaDemuxContext *matroska)
1693 {
1694     int res = 0;
1695     uint32_t id;
1696
1697     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing seekhead...\n");
1698
1699     while (res == 0) {
1700         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1701             res = AVERROR(EIO);
1702             break;
1703         } else if (matroska->level_up) {
1704             matroska->level_up--;
1705             break;
1706         }
1707
1708         switch (id) {
1709             case MATROSKA_ID_SEEKENTRY: {
1710                 uint32_t seek_id = 0, peek_id_cache = 0;
1711                 uint64_t seek_pos = (uint64_t) -1, t;
1712
1713                 if ((res = ebml_read_master(matroska, &id)) < 0)
1714                     break;
1715
1716                 while (res == 0) {
1717                     if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1718                         res = AVERROR(EIO);
1719                         break;
1720                     } else if (matroska->level_up) {
1721                         matroska->level_up--;
1722                         break;
1723                     }
1724
1725                     switch (id) {
1726                         case MATROSKA_ID_SEEKID:
1727                             res = ebml_read_uint(matroska, &id, &t);
1728                             seek_id = t;
1729                             break;
1730
1731                         case MATROSKA_ID_SEEKPOSITION:
1732                             res = ebml_read_uint(matroska, &id, &seek_pos);
1733                             break;
1734
1735                         default:
1736                             av_log(matroska->ctx, AV_LOG_INFO,
1737                                    "Unknown seekhead ID 0x%x\n", id);
1738                             /* fall-through */
1739
1740                         case EBML_ID_VOID:
1741                             res = ebml_read_skip(matroska);
1742                             break;
1743                     }
1744
1745                     if (matroska->level_up) {
1746                         matroska->level_up--;
1747                         break;
1748                     }
1749                 }
1750
1751                 if (!seek_id || seek_pos == (uint64_t) -1) {
1752                     av_log(matroska->ctx, AV_LOG_INFO,
1753                            "Incomplete seekhead entry (0x%x/%"PRIu64")\n",
1754                            seek_id, seek_pos);
1755                     break;
1756                 }
1757
1758                 switch (seek_id) {
1759                     case MATROSKA_ID_CUES:
1760                     case MATROSKA_ID_TAGS: {
1761                         uint32_t level_up = matroska->level_up;
1762                         offset_t before_pos;
1763                         uint64_t length;
1764                         MatroskaLevel level;
1765
1766                         /* remember the peeked ID and the current position */
1767                         peek_id_cache = matroska->peek_id;
1768                         before_pos = url_ftell(matroska->ctx->pb);
1769
1770                         /* seek */
1771                         if ((res = ebml_read_seek(matroska, seek_pos +
1772                                                matroska->segment_start)) < 0)
1773                             return res;
1774
1775                         /* we don't want to lose our seekhead level, so we add
1776                          * a dummy. This is a crude hack. */
1777                         if (matroska->num_levels == EBML_MAX_DEPTH) {
1778                             av_log(matroska->ctx, AV_LOG_INFO,
1779                                    "Max EBML element depth (%d) reached, "
1780                                    "cannot parse further.\n", EBML_MAX_DEPTH);
1781                             return AVERROR_UNKNOWN;
1782                         }
1783
1784                         level.start = 0;
1785                         level.length = (uint64_t)-1;
1786                         matroska->levels[matroska->num_levels] = level;
1787                         matroska->num_levels++;
1788
1789                         /* check ID */
1790                         if (!(id = ebml_peek_id (matroska,
1791                                                  &matroska->level_up)))
1792                             goto finish;
1793                         if (id != seek_id) {
1794                             av_log(matroska->ctx, AV_LOG_INFO,
1795                                    "We looked for ID=0x%x but got "
1796                                    "ID=0x%x (pos=%"PRIu64")",
1797                                    seek_id, id, seek_pos +
1798                                    matroska->segment_start);
1799                             goto finish;
1800                         }
1801
1802                         /* read master + parse */
1803                         if ((res = ebml_read_master(matroska, &id)) < 0)
1804                             goto finish;
1805                         switch (id) {
1806                             case MATROSKA_ID_CUES:
1807                                 if (!(res = matroska_parse_index(matroska)) ||
1808                                     url_feof(matroska->ctx->pb)) {
1809                                     matroska->index_parsed = 1;
1810                                     res = 0;
1811                                 }
1812                                 break;
1813                             case MATROSKA_ID_TAGS:
1814                                 if (!(res = matroska_parse_metadata(matroska)) ||
1815                                    url_feof(matroska->ctx->pb)) {
1816                                     matroska->metadata_parsed = 1;
1817                                     res = 0;
1818                                 }
1819                                 break;
1820                         }
1821
1822                     finish:
1823                         /* remove dummy level */
1824                         while (matroska->num_levels) {
1825                             matroska->num_levels--;
1826                             length =
1827                                 matroska->levels[matroska->num_levels].length;
1828                             if (length == (uint64_t)-1)
1829                                 break;
1830                         }
1831
1832                         /* seek back */
1833                         if ((res = ebml_read_seek(matroska, before_pos)) < 0)
1834                             return res;
1835                         matroska->peek_id = peek_id_cache;
1836                         matroska->level_up = level_up;
1837                         break;
1838                     }
1839
1840                     default:
1841                         av_log(matroska->ctx, AV_LOG_INFO,
1842                                "Ignoring seekhead entry for ID=0x%x\n",
1843                                seek_id);
1844                         break;
1845                 }
1846
1847                 break;
1848             }
1849
1850             default:
1851                 av_log(matroska->ctx, AV_LOG_INFO,
1852                        "Unknown seekhead ID 0x%x\n", id);
1853                 /* fall-through */
1854
1855             case EBML_ID_VOID:
1856                 res = ebml_read_skip(matroska);
1857                 break;
1858         }
1859
1860         if (matroska->level_up) {
1861             matroska->level_up--;
1862             break;
1863         }
1864     }
1865
1866     return res;
1867 }
1868
1869 static int
1870 matroska_parse_attachments(AVFormatContext *s)
1871 {
1872     MatroskaDemuxContext *matroska = s->priv_data;
1873     int res = 0;
1874     uint32_t id;
1875
1876     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing attachments...\n");
1877
1878     while (res == 0) {
1879         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1880             res = AVERROR(EIO);
1881             break;
1882         } else if (matroska->level_up) {
1883             matroska->level_up--;
1884             break;
1885         }
1886
1887         switch (id) {
1888         case MATROSKA_ID_ATTACHEDFILE: {
1889             char* name = NULL;
1890             char* mime = NULL;
1891             uint8_t* data = NULL;
1892             int i, data_size = 0;
1893             AVStream *st;
1894
1895             if ((res = ebml_read_master(matroska, &id)) < 0)
1896                 break;
1897
1898             while (res == 0) {
1899                 if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1900                     res = AVERROR(EIO);
1901                     break;
1902                 } else if (matroska->level_up) {
1903                     matroska->level_up--;
1904                     break;
1905                 }
1906
1907                 switch (id) {
1908                 case MATROSKA_ID_FILENAME:
1909                     res = ebml_read_utf8 (matroska, &id, &name);
1910                     break;
1911
1912                 case MATROSKA_ID_FILEMIMETYPE:
1913                     res = ebml_read_ascii (matroska, &id, &mime);
1914                     break;
1915
1916                 case MATROSKA_ID_FILEDATA:
1917                     res = ebml_read_binary(matroska, &id, &data, &data_size);
1918                     break;
1919
1920                 default:
1921                     av_log(matroska->ctx, AV_LOG_INFO,
1922                            "Unknown attachedfile ID 0x%x\n", id);
1923                 case EBML_ID_VOID:
1924                     res = ebml_read_skip(matroska);
1925                     break;
1926                 }
1927
1928                 if (matroska->level_up) {
1929                     matroska->level_up--;
1930                     break;
1931                 }
1932             }
1933
1934             if (!(name && mime && data && data_size > 0)) {
1935                 av_log(matroska->ctx, AV_LOG_ERROR, "incomplete attachment\n");
1936                 break;
1937             }
1938
1939             st = av_new_stream(s, matroska->num_streams++);
1940             if (st == NULL)
1941                 return AVERROR(ENOMEM);
1942             st->filename = av_strdup(name);
1943             st->codec->codec_id = CODEC_ID_NONE;
1944             st->codec->codec_type = CODEC_TYPE_ATTACHMENT;
1945             st->codec->extradata = av_malloc(data_size);
1946             if(st->codec->extradata == NULL)
1947                 return AVERROR(ENOMEM);
1948             st->codec->extradata_size = data_size;
1949             memcpy(st->codec->extradata, data, data_size);
1950
1951             for (i=0; ff_mkv_mime_tags[i].id != CODEC_ID_NONE; i++) {
1952                 if (!strncmp(ff_mkv_mime_tags[i].str, mime,
1953                              strlen(ff_mkv_mime_tags[i].str))) {
1954                     st->codec->codec_id = ff_mkv_mime_tags[i].id;
1955                     break;
1956                 }
1957             }
1958
1959             av_log(matroska->ctx, AV_LOG_DEBUG, "new attachment: %s, %s, size %d \n", name, mime, data_size);
1960             break;
1961         }
1962
1963         default:
1964             av_log(matroska->ctx, AV_LOG_INFO,
1965                    "Unknown attachments ID 0x%x\n", id);
1966             /* fall-through */
1967
1968         case EBML_ID_VOID:
1969             res = ebml_read_skip(matroska);
1970             break;
1971         }
1972
1973         if (matroska->level_up) {
1974             matroska->level_up--;
1975             break;
1976         }
1977     }
1978
1979     return res;
1980 }
1981
1982 #define ARRAY_SIZE(x)  (sizeof(x)/sizeof(*x))
1983
1984 static int
1985 matroska_aac_profile (char *codec_id)
1986 {
1987     static const char *aac_profiles[] = {
1988         "MAIN", "LC", "SSR"
1989     };
1990     int profile;
1991
1992     for (profile=0; profile<ARRAY_SIZE(aac_profiles); profile++)
1993         if (strstr(codec_id, aac_profiles[profile]))
1994             break;
1995     return profile + 1;
1996 }
1997
1998 static int
1999 matroska_aac_sri (int samplerate)
2000 {
2001     int sri;
2002
2003     for (sri=0; sri<ARRAY_SIZE(ff_mpeg4audio_sample_rates); sri++)
2004         if (ff_mpeg4audio_sample_rates[sri] == samplerate)
2005             break;
2006     return sri;
2007 }
2008
2009 static int
2010 matroska_read_header (AVFormatContext    *s,
2011                       AVFormatParameters *ap)
2012 {
2013     MatroskaDemuxContext *matroska = s->priv_data;
2014     char *doctype;
2015     int version, last_level, res = 0;
2016     uint32_t id;
2017
2018     matroska->ctx = s;
2019
2020     /* First read the EBML header. */
2021     doctype = NULL;
2022     if ((res = ebml_read_header(matroska, &doctype, &version)) < 0)
2023         return res;
2024     if ((doctype == NULL) || strcmp(doctype, "matroska")) {
2025         av_log(matroska->ctx, AV_LOG_ERROR,
2026                "Wrong EBML doctype ('%s' != 'matroska').\n",
2027                doctype ? doctype : "(none)");
2028         if (doctype)
2029             av_free(doctype);
2030         return AVERROR_NOFMT;
2031     }
2032     av_free(doctype);
2033     if (version > 2) {
2034         av_log(matroska->ctx, AV_LOG_ERROR,
2035                "Matroska demuxer version 2 too old for file version %d\n",
2036                version);
2037         return AVERROR_NOFMT;
2038     }
2039
2040     /* The next thing is a segment. */
2041     while (1) {
2042         if (!(id = ebml_peek_id(matroska, &last_level)))
2043             return AVERROR(EIO);
2044         if (id == MATROSKA_ID_SEGMENT)
2045             break;
2046
2047         /* oi! */
2048         av_log(matroska->ctx, AV_LOG_INFO,
2049                "Expected a Segment ID (0x%x), but received 0x%x!\n",
2050                MATROSKA_ID_SEGMENT, id);
2051         if ((res = ebml_read_skip(matroska)) < 0)
2052             return res;
2053     }
2054
2055     /* We now have a Matroska segment.
2056      * Seeks are from the beginning of the segment,
2057      * after the segment ID/length. */
2058     if ((res = ebml_read_master(matroska, &id)) < 0)
2059         return res;
2060     matroska->segment_start = url_ftell(s->pb);
2061
2062     matroska->time_scale = 1000000;
2063     /* we've found our segment, start reading the different contents in here */
2064     while (res == 0) {
2065         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2066             res = AVERROR(EIO);
2067             break;
2068         } else if (matroska->level_up) {
2069             matroska->level_up--;
2070             break;
2071         }
2072
2073         switch (id) {
2074             /* stream info */
2075             case MATROSKA_ID_INFO: {
2076                 if ((res = ebml_read_master(matroska, &id)) < 0)
2077                     break;
2078                 res = matroska_parse_info(matroska);
2079                 break;
2080             }
2081
2082             /* track info headers */
2083             case MATROSKA_ID_TRACKS: {
2084                 if ((res = ebml_read_master(matroska, &id)) < 0)
2085                     break;
2086                 res = matroska_parse_tracks(matroska);
2087                 break;
2088             }
2089
2090             /* stream index */
2091             case MATROSKA_ID_CUES: {
2092                 if (!matroska->index_parsed) {
2093                     if ((res = ebml_read_master(matroska, &id)) < 0)
2094                         break;
2095                     res = matroska_parse_index(matroska);
2096                 } else
2097                     res = ebml_read_skip(matroska);
2098                 break;
2099             }
2100
2101             /* metadata */
2102             case MATROSKA_ID_TAGS: {
2103                 if (!matroska->metadata_parsed) {
2104                     if ((res = ebml_read_master(matroska, &id)) < 0)
2105                         break;
2106                     res = matroska_parse_metadata(matroska);
2107                 } else
2108                     res = ebml_read_skip(matroska);
2109                 break;
2110             }
2111
2112             /* file index (if seekable, seek to Cues/Tags to parse it) */
2113             case MATROSKA_ID_SEEKHEAD: {
2114                 if ((res = ebml_read_master(matroska, &id)) < 0)
2115                     break;
2116                 res = matroska_parse_seekhead(matroska);
2117                 break;
2118             }
2119
2120             case MATROSKA_ID_ATTACHMENTS: {
2121                 if ((res = ebml_read_master(matroska, &id)) < 0)
2122                     break;
2123                 res = matroska_parse_attachments(s);
2124                 break;
2125             }
2126
2127             case MATROSKA_ID_CLUSTER: {
2128                 /* Do not read the master - this will be done in the next
2129                  * call to matroska_read_packet. */
2130                 res = 1;
2131                 break;
2132             }
2133
2134             default:
2135                 av_log(matroska->ctx, AV_LOG_INFO,
2136                        "Unknown matroska file header ID 0x%x\n", id);
2137             /* fall-through */
2138
2139             case EBML_ID_VOID:
2140                 res = ebml_read_skip(matroska);
2141                 break;
2142         }
2143
2144         if (matroska->level_up) {
2145             matroska->level_up--;
2146             break;
2147         }
2148     }
2149
2150     /* Have we found a cluster? */
2151     if (ebml_peek_id(matroska, NULL) == MATROSKA_ID_CLUSTER) {
2152         int i, j;
2153         MatroskaTrack *track;
2154         AVStream *st;
2155
2156         for (i = 0; i < matroska->num_tracks; i++) {
2157             enum CodecID codec_id = CODEC_ID_NONE;
2158             uint8_t *extradata = NULL;
2159             int extradata_size = 0;
2160             int extradata_offset = 0;
2161             track = matroska->tracks[i];
2162             track->stream_index = -1;
2163
2164             /* Apply some sanity checks. */
2165             if (track->codec_id == NULL)
2166                 continue;
2167
2168             for(j=0; ff_mkv_codec_tags[j].id != CODEC_ID_NONE; j++){
2169                 if(!strncmp(ff_mkv_codec_tags[j].str, track->codec_id,
2170                             strlen(ff_mkv_codec_tags[j].str))){
2171                     codec_id= ff_mkv_codec_tags[j].id;
2172                     break;
2173                 }
2174             }
2175
2176             /* Set the FourCC from the CodecID. */
2177             /* This is the MS compatibility mode which stores a
2178              * BITMAPINFOHEADER in the CodecPrivate. */
2179             if (!strcmp(track->codec_id,
2180                         MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC) &&
2181                 (track->codec_priv_size >= 40) &&
2182                 (track->codec_priv != NULL)) {
2183                 MatroskaVideoTrack *vtrack = (MatroskaVideoTrack *) track;
2184
2185                 /* Offset of biCompression. Stored in LE. */
2186                 vtrack->fourcc = AV_RL32(track->codec_priv + 16);
2187                 codec_id = codec_get_id(codec_bmp_tags, vtrack->fourcc);
2188
2189             }
2190
2191             /* This is the MS compatibility mode which stores a
2192              * WAVEFORMATEX in the CodecPrivate. */
2193             else if (!strcmp(track->codec_id,
2194                              MATROSKA_CODEC_ID_AUDIO_ACM) &&
2195                 (track->codec_priv_size >= 18) &&
2196                 (track->codec_priv != NULL)) {
2197                 uint16_t tag;
2198
2199                 /* Offset of wFormatTag. Stored in LE. */
2200                 tag = AV_RL16(track->codec_priv);
2201                 codec_id = codec_get_id(codec_wav_tags, tag);
2202
2203             }
2204
2205             else if (codec_id == CODEC_ID_AAC && !track->codec_priv_size) {
2206                 MatroskaAudioTrack *audiotrack = (MatroskaAudioTrack *) track;
2207                 int profile = matroska_aac_profile(track->codec_id);
2208                 int sri = matroska_aac_sri(audiotrack->internal_samplerate);
2209                 extradata = av_malloc(5);
2210                 if (extradata == NULL)
2211                     return AVERROR(ENOMEM);
2212                 extradata[0] = (profile << 3) | ((sri&0x0E) >> 1);
2213                 extradata[1] = ((sri&0x01) << 7) | (audiotrack->channels<<3);
2214                 if (strstr(track->codec_id, "SBR")) {
2215                     sri = matroska_aac_sri(audiotrack->samplerate);
2216                     extradata[2] = 0x56;
2217                     extradata[3] = 0xE5;
2218                     extradata[4] = 0x80 | (sri<<3);
2219                     extradata_size = 5;
2220                 } else {
2221                     extradata_size = 2;
2222                 }
2223             }
2224
2225             else if (codec_id == CODEC_ID_TTA) {
2226                 MatroskaAudioTrack *audiotrack = (MatroskaAudioTrack *) track;
2227                 ByteIOContext b;
2228                 extradata_size = 30;
2229                 extradata = av_mallocz(extradata_size);
2230                 if (extradata == NULL)
2231                     return AVERROR(ENOMEM);
2232                 init_put_byte(&b, extradata, extradata_size, 1,
2233                               NULL, NULL, NULL, NULL);
2234                 put_buffer(&b, "TTA1", 4);
2235                 put_le16(&b, 1);
2236                 put_le16(&b, audiotrack->channels);
2237                 put_le16(&b, audiotrack->bitdepth);
2238                 put_le32(&b, audiotrack->samplerate);
2239                 put_le32(&b, matroska->ctx->duration * audiotrack->samplerate);
2240             }
2241
2242             else if (codec_id == CODEC_ID_RV10 || codec_id == CODEC_ID_RV20 ||
2243                      codec_id == CODEC_ID_RV30 || codec_id == CODEC_ID_RV40) {
2244                 extradata_offset = 26;
2245                 track->codec_priv_size -= extradata_offset;
2246             }
2247
2248             else if (codec_id == CODEC_ID_RA_144) {
2249                 MatroskaAudioTrack *audiotrack = (MatroskaAudioTrack *)track;
2250                 audiotrack->samplerate = 8000;
2251                 audiotrack->channels = 1;
2252             }
2253
2254             else if (codec_id == CODEC_ID_RA_288 ||
2255                      codec_id == CODEC_ID_COOK ||
2256                      codec_id == CODEC_ID_ATRAC3) {
2257                 MatroskaAudioTrack *audiotrack = (MatroskaAudioTrack *)track;
2258                 ByteIOContext b;
2259
2260                 init_put_byte(&b, track->codec_priv, track->codec_priv_size, 0,
2261                               NULL, NULL, NULL, NULL);
2262                 url_fskip(&b, 24);
2263                 audiotrack->coded_framesize = get_be32(&b);
2264                 url_fskip(&b, 12);
2265                 audiotrack->sub_packet_h    = get_be16(&b);
2266                 audiotrack->frame_size      = get_be16(&b);
2267                 audiotrack->sub_packet_size = get_be16(&b);
2268                 audiotrack->buf = av_malloc(audiotrack->frame_size * audiotrack->sub_packet_h);
2269                 if (codec_id == CODEC_ID_RA_288) {
2270                     audiotrack->block_align = audiotrack->coded_framesize;
2271                     track->codec_priv_size = 0;
2272                 } else {
2273                     audiotrack->block_align = audiotrack->sub_packet_size;
2274                     extradata_offset = 78;
2275                     track->codec_priv_size -= extradata_offset;
2276                 }
2277             }
2278
2279             if (codec_id == CODEC_ID_NONE) {
2280                 av_log(matroska->ctx, AV_LOG_INFO,
2281                        "Unknown/unsupported CodecID %s.\n",
2282                        track->codec_id);
2283             }
2284
2285             track->stream_index = matroska->num_streams;
2286
2287             matroska->num_streams++;
2288             st = av_new_stream(s, track->stream_index);
2289             if (st == NULL)
2290                 return AVERROR(ENOMEM);
2291             av_set_pts_info(st, 64, matroska->time_scale, 1000*1000*1000); /* 64 bit pts in ns */
2292
2293             st->codec->codec_id = codec_id;
2294             st->start_time = 0;
2295             if (strcmp(track->language, "und"))
2296                 strcpy(st->language, track->language);
2297
2298             if (track->flags & MATROSKA_TRACK_DEFAULT)
2299                 st->disposition |= AV_DISPOSITION_DEFAULT;
2300
2301             if (track->default_duration)
2302                 av_reduce(&st->codec->time_base.num, &st->codec->time_base.den,
2303                           track->default_duration, 1000000000, 30000);
2304
2305             if(extradata){
2306                 st->codec->extradata = extradata;
2307                 st->codec->extradata_size = extradata_size;
2308             } else if(track->codec_priv && track->codec_priv_size > 0){
2309                 st->codec->extradata = av_malloc(track->codec_priv_size);
2310                 if(st->codec->extradata == NULL)
2311                     return AVERROR(ENOMEM);
2312                 st->codec->extradata_size = track->codec_priv_size;
2313                 memcpy(st->codec->extradata,track->codec_priv+extradata_offset,
2314                        track->codec_priv_size);
2315             }
2316
2317             if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
2318                 MatroskaVideoTrack *videotrack = (MatroskaVideoTrack *)track;
2319
2320                 st->codec->codec_type = CODEC_TYPE_VIDEO;
2321                 st->codec->codec_tag = videotrack->fourcc;
2322                 st->codec->width = videotrack->pixel_width;
2323                 st->codec->height = videotrack->pixel_height;
2324                 if (videotrack->display_width == 0)
2325                     videotrack->display_width= videotrack->pixel_width;
2326                 if (videotrack->display_height == 0)
2327                     videotrack->display_height= videotrack->pixel_height;
2328                 av_reduce(&st->codec->sample_aspect_ratio.num,
2329                           &st->codec->sample_aspect_ratio.den,
2330                           st->codec->height * videotrack->display_width,
2331                           st->codec-> width * videotrack->display_height,
2332                           255);
2333                 st->need_parsing = AVSTREAM_PARSE_HEADERS;
2334             } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
2335                 MatroskaAudioTrack *audiotrack = (MatroskaAudioTrack *)track;
2336
2337                 st->codec->codec_type = CODEC_TYPE_AUDIO;
2338                 st->codec->sample_rate = audiotrack->samplerate;
2339                 st->codec->channels = audiotrack->channels;
2340                 st->codec->block_align = audiotrack->block_align;
2341             } else if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE) {
2342                 st->codec->codec_type = CODEC_TYPE_SUBTITLE;
2343             }
2344
2345             /* What do we do with private data? E.g. for Vorbis. */
2346         }
2347         res = 0;
2348     }
2349
2350     if (matroska->index_parsed) {
2351         int i, track, stream;
2352         for (i=0; i<matroska->num_indexes; i++) {
2353             MatroskaDemuxIndex *idx = &matroska->index[i];
2354             track = matroska_find_track_by_num(matroska, idx->track);
2355             if (track < 0)  continue;
2356             stream = matroska->tracks[track]->stream_index;
2357             if (stream >= 0 && stream < matroska->ctx->nb_streams)
2358                 av_add_index_entry(matroska->ctx->streams[stream],
2359                                    idx->pos, idx->time/matroska->time_scale,
2360                                    0, 0, AVINDEX_KEYFRAME);
2361         }
2362     }
2363
2364     return res;
2365 }
2366
2367 static int
2368 matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data, int size,
2369                      int64_t pos, uint64_t cluster_time, uint64_t duration,
2370                      int is_keyframe, int is_bframe)
2371 {
2372     int res = 0;
2373     int track;
2374     AVStream *st;
2375     AVPacket *pkt;
2376     uint8_t *origdata = data;
2377     int16_t block_time;
2378     uint32_t *lace_size = NULL;
2379     int n, flags, laces = 0;
2380     uint64_t num;
2381     int stream_index;
2382
2383     /* first byte(s): tracknum */
2384     if ((n = matroska_ebmlnum_uint(data, size, &num)) < 0) {
2385         av_log(matroska->ctx, AV_LOG_ERROR, "EBML block data error\n");
2386         av_free(origdata);
2387         return res;
2388     }
2389     data += n;
2390     size -= n;
2391
2392     /* fetch track from num */
2393     track = matroska_find_track_by_num(matroska, num);
2394     if (size <= 3 || track < 0 || track >= matroska->num_tracks) {
2395         av_log(matroska->ctx, AV_LOG_INFO,
2396                "Invalid stream %d or size %u\n", track, size);
2397         av_free(origdata);
2398         return res;
2399     }
2400     stream_index = matroska->tracks[track]->stream_index;
2401     if (stream_index < 0 || stream_index >= matroska->ctx->nb_streams) {
2402         av_free(origdata);
2403         return res;
2404     }
2405     st = matroska->ctx->streams[stream_index];
2406     if (st->discard >= AVDISCARD_ALL) {
2407         av_free(origdata);
2408         return res;
2409     }
2410     if (duration == AV_NOPTS_VALUE)
2411         duration = matroska->tracks[track]->default_duration / matroska->time_scale;
2412
2413     /* block_time (relative to cluster time) */
2414     block_time = AV_RB16(data);
2415     data += 2;
2416     flags = *data++;
2417     size -= 3;
2418     if (is_keyframe == -1)
2419         is_keyframe = flags & 0x80 ? PKT_FLAG_KEY : 0;
2420
2421     if (matroska->skip_to_keyframe) {
2422         if (!is_keyframe || st != matroska->skip_to_stream) {
2423             av_free(origdata);
2424             return res;
2425         }
2426         matroska->skip_to_keyframe = 0;
2427     }
2428
2429     switch ((flags & 0x06) >> 1) {
2430         case 0x0: /* no lacing */
2431             laces = 1;
2432             lace_size = av_mallocz(sizeof(int));
2433             lace_size[0] = size;
2434             break;
2435
2436         case 0x1: /* xiph lacing */
2437         case 0x2: /* fixed-size lacing */
2438         case 0x3: /* EBML lacing */
2439             if (size == 0) {
2440                 res = -1;
2441                 break;
2442             }
2443             laces = (*data) + 1;
2444             data += 1;
2445             size -= 1;
2446             lace_size = av_mallocz(laces * sizeof(int));
2447
2448             switch ((flags & 0x06) >> 1) {
2449                 case 0x1: /* xiph lacing */ {
2450                     uint8_t temp;
2451                     uint32_t total = 0;
2452                     for (n = 0; res == 0 && n < laces - 1; n++) {
2453                         while (1) {
2454                             if (size == 0) {
2455                                 res = -1;
2456                                 break;
2457                             }
2458                             temp = *data;
2459                             lace_size[n] += temp;
2460                             data += 1;
2461                             size -= 1;
2462                             if (temp != 0xff)
2463                                 break;
2464                         }
2465                         total += lace_size[n];
2466                     }
2467                     lace_size[n] = size - total;
2468                     break;
2469                 }
2470
2471                 case 0x2: /* fixed-size lacing */
2472                     for (n = 0; n < laces; n++)
2473                         lace_size[n] = size / laces;
2474                     break;
2475
2476                 case 0x3: /* EBML lacing */ {
2477                     uint32_t total;
2478                     n = matroska_ebmlnum_uint(data, size, &num);
2479                     if (n < 0) {
2480                         av_log(matroska->ctx, AV_LOG_INFO,
2481                                "EBML block data error\n");
2482                         break;
2483                     }
2484                     data += n;
2485                     size -= n;
2486                     total = lace_size[0] = num;
2487                     for (n = 1; res == 0 && n < laces - 1; n++) {
2488                         int64_t snum;
2489                         int r;
2490                         r = matroska_ebmlnum_sint (data, size, &snum);
2491                         if (r < 0) {
2492                             av_log(matroska->ctx, AV_LOG_INFO,
2493                                    "EBML block data error\n");
2494                             break;
2495                         }
2496                         data += r;
2497                         size -= r;
2498                         lace_size[n] = lace_size[n - 1] + snum;
2499                         total += lace_size[n];
2500                     }
2501                     lace_size[n] = size - total;
2502                     break;
2503                 }
2504             }
2505             break;
2506     }
2507
2508     if (res == 0) {
2509         uint64_t timecode = AV_NOPTS_VALUE;
2510
2511         if (cluster_time != (uint64_t)-1
2512             && (block_time >= 0 || cluster_time >= -block_time))
2513             timecode = cluster_time + block_time;
2514
2515         for (n = 0; n < laces; n++) {
2516             if (st->codec->codec_id == CODEC_ID_RA_288 ||
2517                 st->codec->codec_id == CODEC_ID_COOK ||
2518                 st->codec->codec_id == CODEC_ID_ATRAC3) {
2519                 MatroskaAudioTrack *audiotrack = (MatroskaAudioTrack *)matroska->tracks[track];
2520                 int a = st->codec->block_align;
2521                 int sps = audiotrack->sub_packet_size;
2522                 int cfs = audiotrack->coded_framesize;
2523                 int h = audiotrack->sub_packet_h;
2524                 int y = audiotrack->sub_packet_cnt;
2525                 int w = audiotrack->frame_size;
2526                 int x;
2527
2528                 if (!audiotrack->pkt_cnt) {
2529                     if (st->codec->codec_id == CODEC_ID_RA_288)
2530                         for (x=0; x<h/2; x++)
2531                             memcpy(audiotrack->buf+x*2*w+y*cfs,
2532                                    data+x*cfs, cfs);
2533                     else
2534                         for (x=0; x<w/sps; x++)
2535                             memcpy(audiotrack->buf+sps*(h*x+((h+1)/2)*(y&1)+(y>>1)), data+x*sps, sps);
2536
2537                     if (++audiotrack->sub_packet_cnt >= h) {
2538                         audiotrack->sub_packet_cnt = 0;
2539                         audiotrack->pkt_cnt = h*w / a;
2540                     }
2541                 }
2542                 while (audiotrack->pkt_cnt) {
2543                     pkt = av_mallocz(sizeof(AVPacket));
2544                     av_new_packet(pkt, a);
2545                     memcpy(pkt->data, audiotrack->buf
2546                            + a * (h*w / a - audiotrack->pkt_cnt--), a);
2547                     pkt->pos = pos;
2548                     pkt->stream_index = stream_index;
2549                     matroska_queue_packet(matroska, pkt);
2550                 }
2551             } else {
2552                 int offset = 0;
2553
2554                 pkt = av_mallocz(sizeof(AVPacket));
2555                 /* XXX: prevent data copy... */
2556                 if (av_new_packet(pkt, lace_size[n]-offset) < 0) {
2557                     res = AVERROR(ENOMEM);
2558                     n = laces-1;
2559                     break;
2560                 }
2561                 memcpy (pkt->data, data+offset, lace_size[n]-offset);
2562
2563                 if (n == 0)
2564                     pkt->flags = is_keyframe;
2565                 pkt->stream_index = stream_index;
2566
2567                 pkt->pts = timecode;
2568                 pkt->pos = pos;
2569                 pkt->duration = duration;
2570
2571                 matroska_queue_packet(matroska, pkt);
2572             }
2573
2574             if (timecode != AV_NOPTS_VALUE)
2575                 timecode = duration ? timecode + duration : AV_NOPTS_VALUE;
2576             data += lace_size[n];
2577         }
2578     }
2579
2580     av_free(lace_size);
2581     av_free(origdata);
2582     return res;
2583 }
2584
2585 static int
2586 matroska_parse_blockgroup (MatroskaDemuxContext *matroska,
2587                            uint64_t              cluster_time)
2588 {
2589     int res = 0;
2590     uint32_t id;
2591     int is_bframe = 0;
2592     int is_keyframe = PKT_FLAG_KEY, last_num_packets = matroska->num_packets;
2593     uint64_t duration = AV_NOPTS_VALUE;
2594     uint8_t *data;
2595     int size = 0;
2596     int64_t pos = 0;
2597
2598     av_log(matroska->ctx, AV_LOG_DEBUG, "parsing blockgroup...\n");
2599
2600     while (res == 0) {
2601         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2602             res = AVERROR(EIO);
2603             break;
2604         } else if (matroska->level_up) {
2605             matroska->level_up--;
2606             break;
2607         }
2608
2609         switch (id) {
2610             /* one block inside the group. Note, block parsing is one
2611              * of the harder things, so this code is a bit complicated.
2612              * See http://www.matroska.org/ for documentation. */
2613             case MATROSKA_ID_BLOCK: {
2614                 pos = url_ftell(matroska->ctx->pb);
2615                 res = ebml_read_binary(matroska, &id, &data, &size);
2616                 break;
2617             }
2618
2619             case MATROSKA_ID_BLOCKDURATION: {
2620                 if ((res = ebml_read_uint(matroska, &id, &duration)) < 0)
2621                     break;
2622                 break;
2623             }
2624
2625             case MATROSKA_ID_BLOCKREFERENCE: {
2626                 int64_t num;
2627                 /* We've found a reference, so not even the first frame in
2628                  * the lace is a key frame. */
2629                 is_keyframe = 0;
2630                 if (last_num_packets != matroska->num_packets)
2631                     matroska->packets[last_num_packets]->flags = 0;
2632                 if ((res = ebml_read_sint(matroska, &id, &num)) < 0)
2633                     break;
2634                 if (num > 0)
2635                     is_bframe = 1;
2636                 break;
2637             }
2638
2639             default:
2640                 av_log(matroska->ctx, AV_LOG_INFO,
2641                        "Unknown entry 0x%x in blockgroup data\n", id);
2642                 /* fall-through */
2643
2644             case EBML_ID_VOID:
2645                 res = ebml_read_skip(matroska);
2646                 break;
2647         }
2648
2649         if (matroska->level_up) {
2650             matroska->level_up--;
2651             break;
2652         }
2653     }
2654
2655     if (res)
2656         return res;
2657
2658     if (size > 0)
2659         res = matroska_parse_block(matroska, data, size, pos, cluster_time,
2660                                    duration, is_keyframe, is_bframe);
2661
2662     return res;
2663 }
2664
2665 static int
2666 matroska_parse_cluster (MatroskaDemuxContext *matroska)
2667 {
2668     int res = 0;
2669     uint32_t id;
2670     uint64_t cluster_time = 0;
2671     uint8_t *data;
2672     int64_t pos;
2673     int size;
2674
2675     av_log(matroska->ctx, AV_LOG_DEBUG,
2676            "parsing cluster at %"PRId64"\n", url_ftell(matroska->ctx->pb));
2677
2678     while (res == 0) {
2679         if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2680             res = AVERROR(EIO);
2681             break;
2682         } else if (matroska->level_up) {
2683             matroska->level_up--;
2684             break;
2685         }
2686
2687         switch (id) {
2688             /* cluster timecode */
2689             case MATROSKA_ID_CLUSTERTIMECODE: {
2690                 uint64_t num;
2691                 if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
2692                     break;
2693                 cluster_time = num;
2694                 break;
2695             }
2696
2697                 /* a group of blocks inside a cluster */
2698             case MATROSKA_ID_BLOCKGROUP:
2699                 if ((res = ebml_read_master(matroska, &id)) < 0)
2700                     break;
2701                 res = matroska_parse_blockgroup(matroska, cluster_time);
2702                 break;
2703
2704             case MATROSKA_ID_SIMPLEBLOCK:
2705                 pos = url_ftell(matroska->ctx->pb);
2706                 res = ebml_read_binary(matroska, &id, &data, &size);
2707                 if (res == 0)
2708                     res = matroska_parse_block(matroska, data, size, pos,
2709                                                cluster_time, AV_NOPTS_VALUE,
2710                                                -1, 0);
2711                 break;
2712
2713             default:
2714                 av_log(matroska->ctx, AV_LOG_INFO,
2715                        "Unknown entry 0x%x in cluster data\n", id);
2716                 /* fall-through */
2717
2718             case EBML_ID_VOID:
2719                 res = ebml_read_skip(matroska);
2720                 break;
2721         }
2722
2723         if (matroska->level_up) {
2724             matroska->level_up--;
2725             break;
2726         }
2727     }
2728
2729     return res;
2730 }
2731
2732 static int
2733 matroska_read_packet (AVFormatContext *s,
2734                       AVPacket        *pkt)
2735 {
2736     MatroskaDemuxContext *matroska = s->priv_data;
2737     int res;
2738     uint32_t id;
2739
2740     /* Read stream until we have a packet queued. */
2741     while (matroska_deliver_packet(matroska, pkt)) {
2742
2743         /* Have we already reached the end? */
2744         if (matroska->done)
2745             return AVERROR(EIO);
2746
2747         res = 0;
2748         while (res == 0) {
2749             if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2750                 return AVERROR(EIO);
2751             } else if (matroska->level_up) {
2752                 matroska->level_up--;
2753                 break;
2754             }
2755
2756             switch (id) {
2757                 case MATROSKA_ID_CLUSTER:
2758                     if ((res = ebml_read_master(matroska, &id)) < 0)
2759                         break;
2760                     if ((res = matroska_parse_cluster(matroska)) == 0)
2761                         res = 1; /* Parsed one cluster, let's get out. */
2762                     break;
2763
2764                 default:
2765                 case EBML_ID_VOID:
2766                     res = ebml_read_skip(matroska);
2767                     break;
2768             }
2769
2770             if (matroska->level_up) {
2771                 matroska->level_up--;
2772                 break;
2773             }
2774         }
2775
2776         if (res == -1)
2777             matroska->done = 1;
2778     }
2779
2780     return 0;
2781 }
2782
2783 static int
2784 matroska_read_seek (AVFormatContext *s, int stream_index, int64_t timestamp,
2785                     int flags)
2786 {
2787     MatroskaDemuxContext *matroska = s->priv_data;
2788     AVStream *st = s->streams[stream_index];
2789     int index;
2790
2791     /* find index entry */
2792     index = av_index_search_timestamp(st, timestamp, flags);
2793     if (index < 0)
2794         return 0;
2795
2796     matroska_clear_queue(matroska);
2797
2798     /* do the seek */
2799     url_fseek(s->pb, st->index_entries[index].pos, SEEK_SET);
2800     matroska->skip_to_keyframe = !(flags & AVSEEK_FLAG_ANY);
2801     matroska->skip_to_stream = st;
2802     matroska->peek_id = 0;
2803     return 0;
2804 }
2805
2806 static int
2807 matroska_read_close (AVFormatContext *s)
2808 {
2809     MatroskaDemuxContext *matroska = s->priv_data;
2810     int n = 0;
2811
2812     av_free(matroska->writing_app);
2813     av_free(matroska->muxing_app);
2814     av_free(matroska->index);
2815
2816     matroska_clear_queue(matroska);
2817
2818     for (n = 0; n < matroska->num_tracks; n++) {
2819         MatroskaTrack *track = matroska->tracks[n];
2820         av_free(track->codec_id);
2821         av_free(track->codec_name);
2822         av_free(track->codec_priv);
2823         av_free(track->name);
2824
2825         if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
2826             MatroskaAudioTrack *audiotrack = (MatroskaAudioTrack *)track;
2827             av_free(audiotrack->buf);
2828         }
2829
2830         av_free(track);
2831     }
2832
2833     return 0;
2834 }
2835
2836 AVInputFormat matroska_demuxer = {
2837     "matroska",
2838     "Matroska file format",
2839     sizeof(MatroskaDemuxContext),
2840     matroska_probe,
2841     matroska_read_header,
2842     matroska_read_packet,
2843     matroska_read_close,
2844     matroska_read_seek,
2845 };