]> git.sesse.net Git - ffmpeg/blob - libavformat/matroskadec.c
matroskadec: implement matroska_ebmlnum_uint() using ebml_read_num()
[ffmpeg] / libavformat / matroskadec.c
1 /*
2  * Matroska file demuxer
3  * Copyright (c) 2003-2008 The ffmpeg Project
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file matroskadec.c
24  * Matroska file demuxer
25  * by Ronald Bultje <rbultje@ronald.bitfreak.net>
26  * with a little help from Moritz Bunkus <moritz@bunkus.org>
27  * totally reworked by Aurelien Jacobs <aurel@gnuage.org>
28  * Specs available on the matroska project page: http://www.matroska.org/.
29  */
30
31 #include "avformat.h"
32 /* For codec_get_id(). */
33 #include "riff.h"
34 #include "isom.h"
35 #include "matroska.h"
36 #include "libavcodec/mpeg4audio.h"
37 #include "libavutil/intfloat_readwrite.h"
38 #include "libavutil/avstring.h"
39 #include "libavutil/lzo.h"
40 #ifdef CONFIG_ZLIB
41 #include <zlib.h>
42 #endif
43 #ifdef CONFIG_BZLIB
44 #include <bzlib.h>
45 #endif
46
47 typedef enum {
48     EBML_NONE,
49     EBML_UINT,
50     EBML_FLOAT,
51     EBML_STR,
52     EBML_UTF8,
53     EBML_BIN,
54     EBML_NEST,
55     EBML_PASS,
56     EBML_STOP,
57 } EbmlType;
58
59 typedef const struct EbmlSyntax {
60     uint32_t id;
61     EbmlType type;
62     int list_elem_size;
63     int data_offset;
64     union {
65         uint64_t    u;
66         double      f;
67         const char *s;
68         const struct EbmlSyntax *n;
69     } def;
70 } EbmlSyntax;
71
72 typedef struct {
73     int nb_elem;
74     void *elem;
75 } EbmlList;
76
77 typedef struct {
78     int      size;
79     uint8_t *data;
80     int64_t  pos;
81 } EbmlBin;
82
83 typedef struct {
84     uint64_t version;
85     uint64_t max_size;
86     uint64_t id_length;
87     char    *doctype;
88     uint64_t doctype_version;
89 } Ebml;
90
91 typedef struct {
92     uint64_t algo;
93     EbmlBin  settings;
94 } MatroskaTrackCompression;
95
96 typedef struct {
97     uint64_t scope;
98     uint64_t type;
99     MatroskaTrackCompression compression;
100 } MatroskaTrackEncoding;
101
102 typedef struct {
103     double   frame_rate;
104     uint64_t display_width;
105     uint64_t display_height;
106     uint64_t pixel_width;
107     uint64_t pixel_height;
108     uint64_t fourcc;
109 } MatroskaTrackVideo;
110
111 typedef struct {
112     double   samplerate;
113     double   out_samplerate;
114     uint64_t bitdepth;
115     uint64_t channels;
116
117     /* real audio header (extracted from extradata) */
118     int      coded_framesize;
119     int      sub_packet_h;
120     int      frame_size;
121     int      sub_packet_size;
122     int      sub_packet_cnt;
123     int      pkt_cnt;
124     uint8_t *buf;
125 } MatroskaTrackAudio;
126
127 typedef struct {
128     uint64_t num;
129     uint64_t type;
130     char    *codec_id;
131     EbmlBin  codec_priv;
132     char    *language;
133     double time_scale;
134     uint64_t default_duration;
135     uint64_t flag_default;
136     MatroskaTrackVideo video;
137     MatroskaTrackAudio audio;
138     EbmlList encodings;
139
140     AVStream *stream;
141 } MatroskaTrack;
142
143 typedef struct {
144     char *filename;
145     char *mime;
146     EbmlBin bin;
147 } MatroskaAttachement;
148
149 typedef struct {
150     uint64_t start;
151     uint64_t end;
152     uint64_t uid;
153     char    *title;
154 } MatroskaChapter;
155
156 typedef struct {
157     uint64_t track;
158     uint64_t pos;
159 } MatroskaIndexPos;
160
161 typedef struct {
162     uint64_t time;
163     EbmlList pos;
164 } MatroskaIndex;
165
166 typedef struct {
167     uint64_t id;
168     uint64_t pos;
169 } MatroskaSeekhead;
170
171 typedef struct {
172     uint64_t start;
173     uint64_t length;
174 } MatroskaLevel;
175
176 typedef struct {
177     AVFormatContext *ctx;
178
179     /* ebml stuff */
180     int num_levels;
181     MatroskaLevel levels[EBML_MAX_DEPTH];
182     int level_up;
183
184     uint64_t time_scale;
185     double   duration;
186     char    *title;
187     EbmlList tracks;
188     EbmlList attachments;
189     EbmlList chapters;
190     EbmlList index;
191     EbmlList seekhead;
192
193     /* num_streams is the number of streams that av_new_stream() was called
194      * for ( = that are available to the calling program). */
195     int num_streams;
196
197     /* byte position of the segment inside the stream */
198     offset_t segment_start;
199
200     /* The packet queue. */
201     AVPacket **packets;
202     int num_packets;
203
204     int done;
205     int has_cluster_id;
206
207     /* What to skip before effectively reading a packet. */
208     int skip_to_keyframe;
209     AVStream *skip_to_stream;
210 } MatroskaDemuxContext;
211
212 typedef struct {
213     uint64_t duration;
214     int64_t  reference;
215     EbmlBin  bin;
216 } MatroskaBlock;
217
218 typedef struct {
219     uint64_t timecode;
220     EbmlList blocks;
221 } MatroskaCluster;
222
223 #define ARRAY_SIZE(x)  (sizeof(x)/sizeof(*x))
224
225 static EbmlSyntax ebml_header[] = {
226     { EBML_ID_EBMLREADVERSION,        EBML_UINT, 0, offsetof(Ebml,version), {.u=EBML_VERSION} },
227     { EBML_ID_EBMLMAXSIZELENGTH,      EBML_UINT, 0, offsetof(Ebml,max_size), {.u=8} },
228     { EBML_ID_EBMLMAXIDLENGTH,        EBML_UINT, 0, offsetof(Ebml,id_length), {.u=4} },
229     { EBML_ID_DOCTYPE,                EBML_STR,  0, offsetof(Ebml,doctype), {.s="(none)"} },
230     { EBML_ID_DOCTYPEREADVERSION,     EBML_UINT, 0, offsetof(Ebml,doctype_version), {.u=1} },
231     { EBML_ID_EBMLVERSION,            EBML_NONE },
232     { EBML_ID_DOCTYPEVERSION,         EBML_NONE },
233     { EBML_ID_VOID,                   EBML_NONE },
234     { 0 }
235 };
236
237 static EbmlSyntax ebml_syntax[] = {
238     { EBML_ID_HEADER,                 EBML_NEST, 0, 0, {.n=ebml_header} },
239     { 0 }
240 };
241
242 static EbmlSyntax matroska_info[] = {
243     { MATROSKA_ID_TIMECODESCALE,      EBML_UINT,  0, offsetof(MatroskaDemuxContext,time_scale), {.u=1000000} },
244     { MATROSKA_ID_DURATION,           EBML_FLOAT, 0, offsetof(MatroskaDemuxContext,duration) },
245     { MATROSKA_ID_TITLE,              EBML_UTF8,  0, offsetof(MatroskaDemuxContext,title) },
246     { MATROSKA_ID_WRITINGAPP,         EBML_NONE },
247     { MATROSKA_ID_MUXINGAPP,          EBML_NONE },
248     { MATROSKA_ID_DATEUTC,            EBML_NONE },
249     { MATROSKA_ID_SEGMENTUID,         EBML_NONE },
250     { EBML_ID_VOID,                   EBML_NONE },
251     { 0 }
252 };
253
254 static EbmlSyntax matroska_track_video[] = {
255     { MATROSKA_ID_VIDEOFRAMERATE,     EBML_FLOAT,0, offsetof(MatroskaTrackVideo,frame_rate) },
256     { MATROSKA_ID_VIDEODISPLAYWIDTH,  EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_width) },
257     { MATROSKA_ID_VIDEODISPLAYHEIGHT, EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_height) },
258     { MATROSKA_ID_VIDEOPIXELWIDTH,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_width) },
259     { MATROSKA_ID_VIDEOPIXELHEIGHT,   EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_height) },
260     { MATROSKA_ID_VIDEOCOLORSPACE,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,fourcc) },
261     { MATROSKA_ID_VIDEOFLAGINTERLACED,EBML_NONE },
262     { MATROSKA_ID_VIDEOSTEREOMODE,    EBML_NONE },
263     { MATROSKA_ID_VIDEOASPECTRATIO,   EBML_NONE },
264     { EBML_ID_VOID,                   EBML_NONE },
265     { 0 }
266 };
267
268 static EbmlSyntax matroska_track_audio[] = {
269     { MATROSKA_ID_AUDIOSAMPLINGFREQ,  EBML_FLOAT,0, offsetof(MatroskaTrackAudio,samplerate), {.f=8000.0} },
270     { MATROSKA_ID_AUDIOOUTSAMPLINGFREQ,EBML_FLOAT,0,offsetof(MatroskaTrackAudio,out_samplerate) },
271     { MATROSKA_ID_AUDIOBITDEPTH,      EBML_UINT, 0, offsetof(MatroskaTrackAudio,bitdepth) },
272     { MATROSKA_ID_AUDIOCHANNELS,      EBML_UINT, 0, offsetof(MatroskaTrackAudio,channels), {.u=1} },
273     { EBML_ID_VOID,                   EBML_NONE },
274     { 0 }
275 };
276
277 static EbmlSyntax matroska_track_encoding_compression[] = {
278     { MATROSKA_ID_ENCODINGCOMPALGO,   EBML_UINT, 0, offsetof(MatroskaTrackCompression,algo), {.u=0} },
279     { MATROSKA_ID_ENCODINGCOMPSETTINGS,EBML_BIN, 0, offsetof(MatroskaTrackCompression,settings) },
280     { EBML_ID_VOID,                   EBML_NONE },
281     { 0 }
282 };
283
284 static EbmlSyntax matroska_track_encoding[] = {
285     { MATROSKA_ID_ENCODINGSCOPE,      EBML_UINT, 0, offsetof(MatroskaTrackEncoding,scope), {.u=1} },
286     { MATROSKA_ID_ENCODINGTYPE,       EBML_UINT, 0, offsetof(MatroskaTrackEncoding,type), {.u=0} },
287     { MATROSKA_ID_ENCODINGCOMPRESSION,EBML_NEST, 0, offsetof(MatroskaTrackEncoding,compression), {.n=matroska_track_encoding_compression} },
288     { EBML_ID_VOID,                   EBML_NONE },
289     { 0 }
290 };
291
292 static EbmlSyntax matroska_track_encodings[] = {
293     { MATROSKA_ID_TRACKCONTENTENCODING, EBML_NEST, sizeof(MatroskaTrackEncoding), offsetof(MatroskaTrack,encodings), {.n=matroska_track_encoding} },
294     { EBML_ID_VOID,                   EBML_NONE },
295     { 0 }
296 };
297
298 static EbmlSyntax matroska_track[] = {
299     { MATROSKA_ID_TRACKNUMBER,          EBML_UINT, 0, offsetof(MatroskaTrack,num) },
300     { MATROSKA_ID_TRACKTYPE,            EBML_UINT, 0, offsetof(MatroskaTrack,type) },
301     { MATROSKA_ID_CODECID,              EBML_STR,  0, offsetof(MatroskaTrack,codec_id) },
302     { MATROSKA_ID_CODECPRIVATE,         EBML_BIN,  0, offsetof(MatroskaTrack,codec_priv) },
303     { MATROSKA_ID_TRACKLANGUAGE,        EBML_UTF8, 0, offsetof(MatroskaTrack,language), {.s="eng"} },
304     { MATROSKA_ID_TRACKDEFAULTDURATION, EBML_UINT, 0, offsetof(MatroskaTrack,default_duration) },
305     { MATROSKA_ID_TRACKTIMECODESCALE,   EBML_FLOAT,0, offsetof(MatroskaTrack,time_scale), {.f=1.0} },
306     { MATROSKA_ID_TRACKFLAGDEFAULT,     EBML_UINT, 0, offsetof(MatroskaTrack,flag_default), {.u=1} },
307     { MATROSKA_ID_TRACKVIDEO,           EBML_NEST, 0, offsetof(MatroskaTrack,video), {.n=matroska_track_video} },
308     { MATROSKA_ID_TRACKAUDIO,           EBML_NEST, 0, offsetof(MatroskaTrack,audio), {.n=matroska_track_audio} },
309     { MATROSKA_ID_TRACKCONTENTENCODINGS,EBML_NEST, 0, 0, {.n=matroska_track_encodings} },
310     { MATROSKA_ID_TRACKUID,             EBML_NONE },
311     { MATROSKA_ID_TRACKNAME,            EBML_NONE },
312     { MATROSKA_ID_TRACKFLAGENABLED,     EBML_NONE },
313     { MATROSKA_ID_TRACKFLAGFORCED,      EBML_NONE },
314     { MATROSKA_ID_TRACKFLAGLACING,      EBML_NONE },
315     { MATROSKA_ID_CODECNAME,            EBML_NONE },
316     { MATROSKA_ID_CODECDECODEALL,       EBML_NONE },
317     { MATROSKA_ID_CODECINFOURL,         EBML_NONE },
318     { MATROSKA_ID_CODECDOWNLOADURL,     EBML_NONE },
319     { MATROSKA_ID_TRACKMINCACHE,        EBML_NONE },
320     { MATROSKA_ID_TRACKMAXCACHE,        EBML_NONE },
321     { EBML_ID_VOID,                     EBML_NONE },
322     { 0 }
323 };
324
325 static EbmlSyntax matroska_tracks[] = {
326     { MATROSKA_ID_TRACKENTRY,         EBML_NEST, sizeof(MatroskaTrack), offsetof(MatroskaDemuxContext,tracks), {.n=matroska_track} },
327     { EBML_ID_VOID,                   EBML_NONE },
328     { 0 }
329 };
330
331 static EbmlSyntax matroska_attachment[] = {
332     { MATROSKA_ID_FILENAME,           EBML_UTF8, 0, offsetof(MatroskaAttachement,filename) },
333     { MATROSKA_ID_FILEMIMETYPE,       EBML_STR,  0, offsetof(MatroskaAttachement,mime) },
334     { MATROSKA_ID_FILEDATA,           EBML_BIN,  0, offsetof(MatroskaAttachement,bin) },
335     { MATROSKA_ID_FILEUID,            EBML_NONE },
336     { EBML_ID_VOID,                   EBML_NONE },
337     { 0 }
338 };
339
340 static EbmlSyntax matroska_attachments[] = {
341     { MATROSKA_ID_ATTACHEDFILE,       EBML_NEST, sizeof(MatroskaAttachement), offsetof(MatroskaDemuxContext,attachments), {.n=matroska_attachment} },
342     { EBML_ID_VOID,                   EBML_NONE },
343     { 0 }
344 };
345
346 static EbmlSyntax matroska_chapter_display[] = {
347     { MATROSKA_ID_CHAPSTRING,         EBML_UTF8, 0, offsetof(MatroskaChapter,title) },
348     { EBML_ID_VOID,                   EBML_NONE },
349     { 0 }
350 };
351
352 static EbmlSyntax matroska_chapter_entry[] = {
353     { MATROSKA_ID_CHAPTERTIMESTART,   EBML_UINT, 0, offsetof(MatroskaChapter,start), {.u=AV_NOPTS_VALUE} },
354     { MATROSKA_ID_CHAPTERTIMEEND,     EBML_UINT, 0, offsetof(MatroskaChapter,end), {.u=AV_NOPTS_VALUE} },
355     { MATROSKA_ID_CHAPTERUID,         EBML_UINT, 0, offsetof(MatroskaChapter,uid) },
356     { MATROSKA_ID_CHAPTERDISPLAY,     EBML_NEST, 0, 0, {.n=matroska_chapter_display} },
357     { MATROSKA_ID_CHAPTERFLAGHIDDEN,  EBML_NONE },
358     { EBML_ID_VOID,                   EBML_NONE },
359     { 0 }
360 };
361
362 static EbmlSyntax matroska_chapter[] = {
363     { MATROSKA_ID_CHAPTERATOM,        EBML_NEST, sizeof(MatroskaChapter), offsetof(MatroskaDemuxContext,chapters), {.n=matroska_chapter_entry} },
364     { MATROSKA_ID_EDITIONUID,         EBML_NONE },
365     { MATROSKA_ID_EDITIONFLAGHIDDEN,  EBML_NONE },
366     { MATROSKA_ID_EDITIONFLAGDEFAULT, EBML_NONE },
367     { EBML_ID_VOID,                   EBML_NONE },
368     { 0 }
369 };
370
371 static EbmlSyntax matroska_chapters[] = {
372     { MATROSKA_ID_EDITIONENTRY,       EBML_NEST, 0, 0, {.n=matroska_chapter} },
373     { EBML_ID_VOID,                   EBML_NONE },
374     { 0 }
375 };
376
377 static EbmlSyntax matroska_index_pos[] = {
378     { MATROSKA_ID_CUETRACK,           EBML_UINT, 0, offsetof(MatroskaIndexPos,track) },
379     { MATROSKA_ID_CUECLUSTERPOSITION, EBML_UINT, 0, offsetof(MatroskaIndexPos,pos)   },
380     { EBML_ID_VOID,                   EBML_NONE },
381     { 0 }
382 };
383
384 static EbmlSyntax matroska_index_entry[] = {
385     { MATROSKA_ID_CUETIME,            EBML_UINT, 0, offsetof(MatroskaIndex,time) },
386     { MATROSKA_ID_CUETRACKPOSITION,   EBML_NEST, sizeof(MatroskaIndexPos), offsetof(MatroskaIndex,pos), {.n=matroska_index_pos} },
387     { EBML_ID_VOID,                   EBML_NONE },
388     { 0 }
389 };
390
391 static EbmlSyntax matroska_index[] = {
392     { MATROSKA_ID_POINTENTRY,         EBML_NEST, sizeof(MatroskaIndex), offsetof(MatroskaDemuxContext,index), {.n=matroska_index_entry} },
393     { EBML_ID_VOID,                   EBML_NONE },
394     { 0 }
395 };
396
397 static EbmlSyntax matroska_tags[] = {
398     { EBML_ID_VOID,                   EBML_NONE },
399     { 0 }
400 };
401
402 static EbmlSyntax matroska_seekhead_entry[] = {
403     { MATROSKA_ID_SEEKID,             EBML_UINT, 0, offsetof(MatroskaSeekhead,id) },
404     { MATROSKA_ID_SEEKPOSITION,       EBML_UINT, 0, offsetof(MatroskaSeekhead,pos), {.u=-1} },
405     { EBML_ID_VOID,                   EBML_NONE },
406     { 0 }
407 };
408
409 static EbmlSyntax matroska_seekhead[] = {
410     { MATROSKA_ID_SEEKENTRY,          EBML_NEST, sizeof(MatroskaSeekhead), offsetof(MatroskaDemuxContext,seekhead), {.n=matroska_seekhead_entry} },
411     { EBML_ID_VOID,                   EBML_NONE },
412     { 0 }
413 };
414
415 static EbmlSyntax matroska_segment[] = {
416     { MATROSKA_ID_INFO,           EBML_NEST, 0, 0, {.n=matroska_info       } },
417     { MATROSKA_ID_TRACKS,         EBML_NEST, 0, 0, {.n=matroska_tracks     } },
418     { MATROSKA_ID_ATTACHMENTS,    EBML_NEST, 0, 0, {.n=matroska_attachments} },
419     { MATROSKA_ID_CHAPTERS,       EBML_NEST, 0, 0, {.n=matroska_chapters   } },
420     { MATROSKA_ID_CUES,           EBML_NEST, 0, 0, {.n=matroska_index      } },
421     { MATROSKA_ID_TAGS,           EBML_NEST, 0, 0, {.n=matroska_tags       } },
422     { MATROSKA_ID_SEEKHEAD,       EBML_NEST, 0, 0, {.n=matroska_seekhead   } },
423     { MATROSKA_ID_CLUSTER,        EBML_STOP, 0, offsetof(MatroskaDemuxContext,has_cluster_id) },
424     { EBML_ID_VOID,               EBML_NONE },
425     { 0 }
426 };
427
428 static EbmlSyntax matroska_segments[] = {
429     { MATROSKA_ID_SEGMENT,        EBML_NEST, 0, 0, {.n=matroska_segment    } },
430     { 0 }
431 };
432
433 static EbmlSyntax matroska_blockgroup[] = {
434     { MATROSKA_ID_BLOCK,          EBML_BIN,  0, offsetof(MatroskaBlock,bin) },
435     { MATROSKA_ID_SIMPLEBLOCK,    EBML_BIN,  0, offsetof(MatroskaBlock,bin) },
436     { MATROSKA_ID_BLOCKDURATION,  EBML_UINT, 0, offsetof(MatroskaBlock,duration), {.u=AV_NOPTS_VALUE} },
437     { MATROSKA_ID_BLOCKREFERENCE, EBML_UINT, 0, offsetof(MatroskaBlock,reference) },
438     { EBML_ID_VOID,               EBML_NONE },
439     { 0 }
440 };
441
442 static EbmlSyntax matroska_cluster[] = {
443     { MATROSKA_ID_CLUSTERTIMECODE,EBML_UINT,0, offsetof(MatroskaCluster,timecode) },
444     { MATROSKA_ID_BLOCKGROUP,     EBML_NEST, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
445     { MATROSKA_ID_SIMPLEBLOCK,    EBML_PASS, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
446     { EBML_ID_VOID,               EBML_NONE },
447     { 0 }
448 };
449
450 static EbmlSyntax matroska_clusters[] = {
451     { MATROSKA_ID_CLUSTER,        EBML_NEST, 0, 0, {.n=matroska_cluster} },
452     { 0 }
453 };
454
455 /*
456  * Return: whether we reached the end of a level in the hierarchy or not
457  */
458 static int ebml_level_end(MatroskaDemuxContext *matroska)
459 {
460     ByteIOContext *pb = matroska->ctx->pb;
461     offset_t pos = url_ftell(pb);
462
463     if (matroska->num_levels > 0) {
464         MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
465         if (pos - level->start >= level->length) {
466             matroska->num_levels--;
467             return 1;
468         }
469     }
470     return 0;
471 }
472
473 /*
474  * Read: an "EBML number", which is defined as a variable-length
475  * array of bytes. The first byte indicates the length by giving a
476  * number of 0-bits followed by a one. The position of the first
477  * "one" bit inside the first byte indicates the length of this
478  * number.
479  * Returns: num. of bytes read. < 0 on error.
480  */
481 static int ebml_read_num(MatroskaDemuxContext *matroska, ByteIOContext *pb,
482                          int max_size, uint64_t *number)
483 {
484     int len_mask = 0x80, read = 1, n = 1;
485     int64_t total = 0;
486
487     /* the first byte tells us the length in bytes - get_byte() can normally
488      * return 0, but since that's not a valid first ebmlID byte, we can
489      * use it safely here to catch EOS. */
490     if (!(total = get_byte(pb))) {
491         /* we might encounter EOS here */
492         if (!url_feof(pb)) {
493             offset_t pos = url_ftell(pb);
494             av_log(matroska->ctx, AV_LOG_ERROR,
495                    "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
496                    pos, pos);
497         }
498         return AVERROR(EIO); /* EOS or actual I/O error */
499     }
500
501     /* get the length of the EBML number */
502     while (read <= max_size && !(total & len_mask)) {
503         read++;
504         len_mask >>= 1;
505     }
506     if (read > max_size) {
507         offset_t pos = url_ftell(pb) - 1;
508         av_log(matroska->ctx, AV_LOG_ERROR,
509                "Invalid EBML number size tag 0x%02x at pos %"PRIu64" (0x%"PRIx64")\n",
510                (uint8_t) total, pos, pos);
511         return AVERROR_INVALIDDATA;
512     }
513
514     /* read out length */
515     total &= ~len_mask;
516     while (n++ < read)
517         total = (total << 8) | get_byte(pb);
518
519     *number = total;
520
521     return read;
522 }
523
524 /*
525  * Read: the element content data ID.
526  * 0 is success, < 0 is failure.
527  */
528 static int ebml_read_element_id(MatroskaDemuxContext *matroska, uint32_t *id)
529 {
530     int read;
531     uint64_t total;
532
533     /* read out the "EBML number", include tag in ID */
534     if ((read = ebml_read_num(matroska, matroska->ctx->pb, 4, &total)) < 0)
535         return read;
536     *id = total | (1 << (read * 7));
537
538     return 0;
539 }
540
541 /*
542  * Read the next element as an unsigned int.
543  * 0 is success, < 0 is failure.
544  */
545 static int ebml_read_uint(ByteIOContext *pb, int size, uint64_t *num)
546 {
547     int n = 0;
548
549     if (size < 1 || size > 8)
550         return AVERROR_INVALIDDATA;
551
552     /* big-endian ordening; build up number */
553     *num = 0;
554     while (n++ < size)
555         *num = (*num << 8) | get_byte(pb);
556
557     return 0;
558 }
559
560 /*
561  * Read the next element as a float.
562  * 0 is success, < 0 is failure.
563  */
564 static int ebml_read_float(ByteIOContext *pb, int size, double *num)
565 {
566     if (size == 4) {
567         *num= av_int2flt(get_be32(pb));
568     } else if(size==8){
569         *num= av_int2dbl(get_be64(pb));
570     } else
571         return AVERROR_INVALIDDATA;
572
573     return 0;
574 }
575
576 /*
577  * Read the next element as an ASCII string.
578  * 0 is success, < 0 is failure.
579  */
580 static int ebml_read_ascii(ByteIOContext *pb, int size, char **str)
581 {
582     av_free(*str);
583     /* ebml strings are usually not 0-terminated, so we allocate one
584      * byte more, read the string and NULL-terminate it ourselves. */
585     if (!(*str = av_malloc(size + 1)))
586         return AVERROR(ENOMEM);
587     if (get_buffer(pb, (uint8_t *) *str, size) != size) {
588         av_free(*str);
589         return AVERROR(EIO);
590     }
591     (*str)[size] = '\0';
592
593     return 0;
594 }
595
596 /*
597  * Read the next element as binary data.
598  * 0 is success, < 0 is failure.
599  */
600 static int ebml_read_binary(ByteIOContext *pb, int length, EbmlBin *bin)
601 {
602     av_free(bin->data);
603     if (!(bin->data = av_malloc(length)))
604         return AVERROR(ENOMEM);
605
606     bin->size = length;
607     bin->pos  = url_ftell(pb);
608     if (get_buffer(pb, bin->data, length) != length)
609         return AVERROR(EIO);
610
611     return 0;
612 }
613
614 /*
615  * Read the next element, but only the header. The contents
616  * are supposed to be sub-elements which can be read separately.
617  * 0 is success, < 0 is failure.
618  */
619 static int ebml_read_master(MatroskaDemuxContext *matroska, int length)
620 {
621     ByteIOContext *pb = matroska->ctx->pb;
622     MatroskaLevel *level;
623
624     if (matroska->num_levels >= EBML_MAX_DEPTH) {
625         av_log(matroska->ctx, AV_LOG_ERROR,
626                "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
627         return AVERROR(ENOSYS);
628     }
629
630     level = &matroska->levels[matroska->num_levels++];
631     level->start = url_ftell(pb);
632     level->length = length;
633
634     return 0;
635 }
636
637 /*
638  * Read signed/unsigned "EBML" numbers.
639  * Return: number of bytes processed, < 0 on error.
640  */
641 static int matroska_ebmlnum_uint(MatroskaDemuxContext *matroska,
642                                  uint8_t *data, uint32_t size, uint64_t *num)
643 {
644     ByteIOContext pb;
645     init_put_byte(&pb, data, size, 0, NULL, NULL, NULL, NULL);
646     return ebml_read_num(matroska, &pb, 8, num);
647 }
648
649 /*
650  * Same as above, but signed.
651  */
652 static int matroska_ebmlnum_sint(MatroskaDemuxContext *matroska,
653                                  uint8_t *data, uint32_t size, int64_t *num)
654 {
655     uint64_t unum;
656     int res;
657
658     /* read as unsigned number first */
659     if ((res = matroska_ebmlnum_uint(matroska, data, size, &unum)) < 0)
660         return res;
661
662     /* make signed (weird way) */
663     if (unum == (uint64_t)-1)
664         *num = INT64_MAX;
665     else
666         *num = unum - ((1LL << ((7 * res) - 1)) - 1);
667
668     return res;
669 }
670
671 static int ebml_parse_elem(MatroskaDemuxContext *matroska,
672                            EbmlSyntax *syntax, void *data);
673
674 static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
675                          uint32_t id, void *data)
676 {
677     int i;
678     for (i=0; syntax[i].id; i++)
679         if (id == syntax[i].id)
680             break;
681     if (!syntax[i].id)
682         av_log(matroska->ctx, AV_LOG_INFO, "Unknown entry 0x%X\n", id);
683     return ebml_parse_elem(matroska, &syntax[i], data);
684 }
685
686 static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
687                       void *data)
688 {
689     uint32_t id;
690     int res = ebml_read_element_id(matroska, &id);
691     return res < 0 ? res : ebml_parse_id(matroska, syntax, id, data);
692 }
693
694 static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
695                            void *data)
696 {
697     int i, res = 0;
698
699     for (i=0; syntax[i].id; i++)
700         switch (syntax[i].type) {
701         case EBML_UINT:
702             *(uint64_t *)((char *)data+syntax[i].data_offset) = syntax[i].def.u;
703             break;
704         case EBML_FLOAT:
705             *(double   *)((char *)data+syntax[i].data_offset) = syntax[i].def.f;
706             break;
707         case EBML_STR:
708         case EBML_UTF8:
709             *(char    **)((char *)data+syntax[i].data_offset) = av_strdup(syntax[i].def.s);
710             break;
711         }
712
713     while (!res && !ebml_level_end(matroska))
714         res = ebml_parse(matroska, syntax, data);
715
716     return res;
717 }
718
719 static int ebml_parse_elem(MatroskaDemuxContext *matroska,
720                            EbmlSyntax *syntax, void *data)
721 {
722     ByteIOContext *pb = matroska->ctx->pb;
723     uint32_t id = syntax->id;
724     uint64_t length;
725     int res;
726
727     data = (char *)data + syntax->data_offset;
728     if (syntax->list_elem_size) {
729         EbmlList *list = data;
730         list->elem = av_realloc(list->elem, (list->nb_elem+1)*syntax->list_elem_size);
731         data = (char*)list->elem + list->nb_elem*syntax->list_elem_size;
732         memset(data, 0, syntax->list_elem_size);
733         list->nb_elem++;
734     }
735
736     if (syntax->type != EBML_PASS && syntax->type != EBML_STOP)
737         if ((res = ebml_read_num(matroska, pb, 8, &length)) < 0)
738             return res;
739
740     switch (syntax->type) {
741     case EBML_UINT:  res = ebml_read_uint  (pb, length, data);  break;
742     case EBML_FLOAT: res = ebml_read_float (pb, length, data);  break;
743     case EBML_STR:
744     case EBML_UTF8:  res = ebml_read_ascii (pb, length, data);  break;
745     case EBML_BIN:   res = ebml_read_binary(pb, length, data);  break;
746     case EBML_NEST:  if ((res=ebml_read_master(matroska, length)) < 0)
747                          return res;
748                      if (id == MATROSKA_ID_SEGMENT)
749                          matroska->segment_start = url_ftell(matroska->ctx->pb);
750                      return ebml_parse_nest(matroska, syntax->def.n, data);
751     case EBML_PASS:  return ebml_parse_id(matroska, syntax->def.n, id, data);
752     case EBML_STOP:  *(int *)data = 1;      return 1;
753     default:         url_fskip(pb, length); return 0;
754     }
755     if (res == AVERROR_INVALIDDATA)
756         av_log(matroska->ctx, AV_LOG_ERROR, "Invalid element\n");
757     else if (res == AVERROR(EIO))
758         av_log(matroska->ctx, AV_LOG_ERROR, "Read error\n");
759     return res;
760 }
761
762 static void ebml_free(EbmlSyntax *syntax, void *data)
763 {
764     int i, j;
765     for (i=0; syntax[i].id; i++) {
766         void *data_off = (char *)data + syntax[i].data_offset;
767         switch (syntax[i].type) {
768         case EBML_STR:
769         case EBML_UTF8:  av_freep(data_off);                      break;
770         case EBML_BIN:   av_freep(&((EbmlBin *)data_off)->data);  break;
771         case EBML_NEST:
772             if (syntax[i].list_elem_size) {
773                 EbmlList *list = data_off;
774                 char *ptr = list->elem;
775                 for (j=0; j<list->nb_elem; j++, ptr+=syntax[i].list_elem_size)
776                     ebml_free(syntax[i].def.n, ptr);
777                 av_free(list->elem);
778             } else
779                 ebml_free(syntax[i].def.n, data_off);
780         default:  break;
781         }
782     }
783 }
784
785
786 /*
787  * Autodetecting...
788  */
789 static int matroska_probe(AVProbeData *p)
790 {
791     uint64_t total = 0;
792     int len_mask = 0x80, size = 1, n = 1;
793     char probe_data[] = "matroska";
794
795     /* ebml header? */
796     if (AV_RB32(p->buf) != EBML_ID_HEADER)
797         return 0;
798
799     /* length of header */
800     total = p->buf[4];
801     while (size <= 8 && !(total & len_mask)) {
802         size++;
803         len_mask >>= 1;
804     }
805     if (size > 8)
806       return 0;
807     total &= (len_mask - 1);
808     while (n < size)
809         total = (total << 8) | p->buf[4 + n++];
810
811     /* does the probe data contain the whole header? */
812     if (p->buf_size < 4 + size + total)
813       return 0;
814
815     /* the header must contain the document type 'matroska'. For now,
816      * we don't parse the whole header but simply check for the
817      * availability of that array of characters inside the header.
818      * Not fully fool-proof, but good enough. */
819     for (n = 4+size; n <= 4+size+total-(sizeof(probe_data)-1); n++)
820         if (!memcmp(p->buf+n, probe_data, sizeof(probe_data)-1))
821             return AVPROBE_SCORE_MAX;
822
823     return 0;
824 }
825
826 static MatroskaTrack *matroska_find_track_by_num(MatroskaDemuxContext *matroska,
827                                                  int num)
828 {
829     MatroskaTrack *tracks = matroska->tracks.elem;
830     int i;
831
832     for (i=0; i < matroska->tracks.nb_elem; i++)
833         if (tracks[i].num == num)
834             return &tracks[i];
835
836     av_log(matroska->ctx, AV_LOG_ERROR, "Invalid track number %d\n", num);
837     return NULL;
838 }
839
840 static int matroska_decode_buffer(uint8_t** buf, int* buf_size,
841                                   MatroskaTrack *track)
842 {
843     MatroskaTrackEncoding *encodings = track->encodings.elem;
844     uint8_t* data = *buf;
845     int isize = *buf_size;
846     uint8_t* pkt_data = NULL;
847     int pkt_size = isize;
848     int result = 0;
849     int olen;
850
851     switch (encodings[0].compression.algo) {
852     case MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP:
853         return encodings[0].compression.settings.size;
854     case MATROSKA_TRACK_ENCODING_COMP_LZO:
855         do {
856             olen = pkt_size *= 3;
857             pkt_data = av_realloc(pkt_data,
858                                   pkt_size+LZO_OUTPUT_PADDING);
859             result = lzo1x_decode(pkt_data, &olen, data, &isize);
860         } while (result==LZO_OUTPUT_FULL && pkt_size<10000000);
861         if (result)
862             goto failed;
863         pkt_size -= olen;
864         break;
865 #ifdef CONFIG_ZLIB
866     case MATROSKA_TRACK_ENCODING_COMP_ZLIB: {
867         z_stream zstream = {0};
868         if (inflateInit(&zstream) != Z_OK)
869             return -1;
870         zstream.next_in = data;
871         zstream.avail_in = isize;
872         do {
873             pkt_size *= 3;
874             pkt_data = av_realloc(pkt_data, pkt_size);
875             zstream.avail_out = pkt_size - zstream.total_out;
876             zstream.next_out = pkt_data + zstream.total_out;
877             result = inflate(&zstream, Z_NO_FLUSH);
878         } while (result==Z_OK && pkt_size<10000000);
879         pkt_size = zstream.total_out;
880         inflateEnd(&zstream);
881         if (result != Z_STREAM_END)
882             goto failed;
883         break;
884     }
885 #endif
886 #ifdef CONFIG_BZLIB
887     case MATROSKA_TRACK_ENCODING_COMP_BZLIB: {
888         bz_stream bzstream = {0};
889         if (BZ2_bzDecompressInit(&bzstream, 0, 0) != BZ_OK)
890             return -1;
891         bzstream.next_in = data;
892         bzstream.avail_in = isize;
893         do {
894             pkt_size *= 3;
895             pkt_data = av_realloc(pkt_data, pkt_size);
896             bzstream.avail_out = pkt_size - bzstream.total_out_lo32;
897             bzstream.next_out = pkt_data + bzstream.total_out_lo32;
898             result = BZ2_bzDecompress(&bzstream);
899         } while (result==BZ_OK && pkt_size<10000000);
900         pkt_size = bzstream.total_out_lo32;
901         BZ2_bzDecompressEnd(&bzstream);
902         if (result != BZ_STREAM_END)
903             goto failed;
904         break;
905     }
906 #endif
907     }
908
909     *buf = pkt_data;
910     *buf_size = pkt_size;
911     return 0;
912  failed:
913     av_free(pkt_data);
914     return -1;
915 }
916
917 static void matroska_execute_seekhead(MatroskaDemuxContext *matroska)
918 {
919     EbmlList *seekhead_list = &matroska->seekhead;
920     MatroskaSeekhead *seekhead = seekhead_list->elem;
921     uint32_t level_up = matroska->level_up;
922     offset_t before_pos = url_ftell(matroska->ctx->pb);
923     MatroskaLevel level;
924     int i;
925
926     for (i=0; i<seekhead_list->nb_elem; i++) {
927         offset_t offset = seekhead[i].pos + matroska->segment_start;
928
929         if (seekhead[i].pos <= before_pos
930             || seekhead[i].id == MATROSKA_ID_SEEKHEAD
931             || seekhead[i].id == MATROSKA_ID_CLUSTER)
932             continue;
933
934         /* seek */
935         if (url_fseek(matroska->ctx->pb, offset, SEEK_SET) != offset)
936             continue;
937
938         /* we don't want to lose our seekhead level, so we add
939          * a dummy. This is a crude hack. */
940         if (matroska->num_levels == EBML_MAX_DEPTH) {
941             av_log(matroska->ctx, AV_LOG_INFO,
942                    "Max EBML element depth (%d) reached, "
943                    "cannot parse further.\n", EBML_MAX_DEPTH);
944             break;
945         }
946
947         level.start = 0;
948         level.length = (uint64_t)-1;
949         matroska->levels[matroska->num_levels] = level;
950         matroska->num_levels++;
951
952         ebml_parse(matroska, matroska_segment, matroska);
953
954         /* remove dummy level */
955         while (matroska->num_levels) {
956             uint64_t length = matroska->levels[--matroska->num_levels].length;
957             if (length == (uint64_t)-1)
958                 break;
959         }
960     }
961
962     /* seek back */
963     url_fseek(matroska->ctx->pb, before_pos, SEEK_SET);
964     matroska->level_up = level_up;
965 }
966
967 static int matroska_aac_profile(char *codec_id)
968 {
969     static const char *aac_profiles[] = { "MAIN", "LC", "SSR" };
970     int profile;
971
972     for (profile=0; profile<ARRAY_SIZE(aac_profiles); profile++)
973         if (strstr(codec_id, aac_profiles[profile]))
974             break;
975     return profile + 1;
976 }
977
978 static int matroska_aac_sri(int samplerate)
979 {
980     int sri;
981
982     for (sri=0; sri<ARRAY_SIZE(ff_mpeg4audio_sample_rates); sri++)
983         if (ff_mpeg4audio_sample_rates[sri] == samplerate)
984             break;
985     return sri;
986 }
987
988 static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
989 {
990     MatroskaDemuxContext *matroska = s->priv_data;
991     EbmlList *attachements_list = &matroska->attachments;
992     MatroskaAttachement *attachements;
993     EbmlList *chapters_list = &matroska->chapters;
994     MatroskaChapter *chapters;
995     MatroskaTrack *tracks;
996     EbmlList *index_list;
997     MatroskaIndex *index;
998     Ebml ebml = { 0 };
999     AVStream *st;
1000     int i, j;
1001
1002     matroska->ctx = s;
1003
1004     /* First read the EBML header. */
1005     if (ebml_parse(matroska, ebml_syntax, &ebml)
1006         || ebml.version > EBML_VERSION       || ebml.max_size > sizeof(uint64_t)
1007         || ebml.id_length > sizeof(uint32_t) || strcmp(ebml.doctype, "matroska")
1008         || ebml.doctype_version > 2) {
1009         av_log(matroska->ctx, AV_LOG_ERROR,
1010                "EBML header using unsupported features\n"
1011                "(EBML version %"PRIu64", doctype %s, doc version %"PRIu64")\n",
1012                ebml.version, ebml.doctype, ebml.doctype_version);
1013         return AVERROR_NOFMT;
1014     }
1015     ebml_free(ebml_syntax, &ebml);
1016
1017     /* The next thing is a segment. */
1018     if (ebml_parse(matroska, matroska_segments, matroska) < 0)
1019         return -1;
1020     matroska_execute_seekhead(matroska);
1021
1022     if (matroska->duration)
1023         matroska->ctx->duration = matroska->duration * matroska->time_scale
1024                                   * 1000 / AV_TIME_BASE;
1025     if (matroska->title)
1026         strncpy(matroska->ctx->title, matroska->title,
1027                 sizeof(matroska->ctx->title)-1);
1028
1029     tracks = matroska->tracks.elem;
1030     for (i=0; i < matroska->tracks.nb_elem; i++) {
1031         MatroskaTrack *track = &tracks[i];
1032         enum CodecID codec_id = CODEC_ID_NONE;
1033         EbmlList *encodings_list = &tracks->encodings;
1034         MatroskaTrackEncoding *encodings = encodings_list->elem;
1035         uint8_t *extradata = NULL;
1036         int extradata_size = 0;
1037         int extradata_offset = 0;
1038
1039         /* Apply some sanity checks. */
1040         if (track->type != MATROSKA_TRACK_TYPE_VIDEO &&
1041             track->type != MATROSKA_TRACK_TYPE_AUDIO &&
1042             track->type != MATROSKA_TRACK_TYPE_SUBTITLE) {
1043             av_log(matroska->ctx, AV_LOG_INFO,
1044                    "Unknown or unsupported track type %"PRIu64"\n",
1045                    track->type);
1046             continue;
1047         }
1048         if (track->codec_id == NULL)
1049             continue;
1050
1051         if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
1052             if (!track->default_duration)
1053                 track->default_duration = 1000000000/track->video.frame_rate;
1054             if (!track->video.display_width)
1055                 track->video.display_width = track->video.pixel_width;
1056             if (!track->video.display_height)
1057                 track->video.display_height = track->video.pixel_height;
1058         } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
1059             if (!track->audio.out_samplerate)
1060                 track->audio.out_samplerate = track->audio.samplerate;
1061         }
1062         if (encodings_list->nb_elem > 1) {
1063             av_log(matroska->ctx, AV_LOG_ERROR,
1064                    "Multiple combined encodings no supported");
1065         } else if (encodings_list->nb_elem == 1) {
1066             if (encodings[0].type ||
1067                 (encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP &&
1068 #ifdef CONFIG_ZLIB
1069                  encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_ZLIB &&
1070 #endif
1071 #ifdef CONFIG_BZLIB
1072                  encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_BZLIB &&
1073 #endif
1074                  encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_LZO)) {
1075                 encodings[0].scope = 0;
1076                 av_log(matroska->ctx, AV_LOG_ERROR,
1077                        "Unsupported encoding type");
1078             } else if (track->codec_priv.size && encodings[0].scope&2) {
1079                 uint8_t *codec_priv = track->codec_priv.data;
1080                 int offset = matroska_decode_buffer(&track->codec_priv.data,
1081                                                     &track->codec_priv.size,
1082                                                     track);
1083                 if (offset < 0) {
1084                     track->codec_priv.data = NULL;
1085                     track->codec_priv.size = 0;
1086                     av_log(matroska->ctx, AV_LOG_ERROR,
1087                            "Failed to decode codec private data\n");
1088                 } else if (offset > 0) {
1089                     track->codec_priv.data = av_malloc(track->codec_priv.size + offset);
1090                     memcpy(track->codec_priv.data,
1091                            encodings[0].compression.settings.data, offset);
1092                     memcpy(track->codec_priv.data+offset, codec_priv,
1093                            track->codec_priv.size);
1094                     track->codec_priv.size += offset;
1095                 }
1096                 if (codec_priv != track->codec_priv.data)
1097                     av_free(codec_priv);
1098             }
1099         }
1100
1101         for(j=0; ff_mkv_codec_tags[j].id != CODEC_ID_NONE; j++){
1102             if(!strncmp(ff_mkv_codec_tags[j].str, track->codec_id,
1103                         strlen(ff_mkv_codec_tags[j].str))){
1104                 codec_id= ff_mkv_codec_tags[j].id;
1105                 break;
1106             }
1107         }
1108
1109         st = track->stream = av_new_stream(s, matroska->num_streams++);
1110         if (st == NULL)
1111             return AVERROR(ENOMEM);
1112
1113         if (!strcmp(track->codec_id, "V_MS/VFW/FOURCC")
1114             && track->codec_priv.size >= 40
1115             && track->codec_priv.data != NULL) {
1116             track->video.fourcc = AV_RL32(track->codec_priv.data + 16);
1117             codec_id = codec_get_id(codec_bmp_tags, track->video.fourcc);
1118         } else if (!strcmp(track->codec_id, "A_MS/ACM")
1119                    && track->codec_priv.size >= 18
1120                    && track->codec_priv.data != NULL) {
1121             uint16_t tag = AV_RL16(track->codec_priv.data);
1122             codec_id = codec_get_id(codec_wav_tags, tag);
1123         } else if (!strcmp(track->codec_id, "V_QUICKTIME")
1124                    && (track->codec_priv.size >= 86)
1125                    && (track->codec_priv.data != NULL)) {
1126             track->video.fourcc = AV_RL32(track->codec_priv.data);
1127             codec_id=codec_get_id(codec_movvideo_tags, track->video.fourcc);
1128         } else if (codec_id == CODEC_ID_AAC && !track->codec_priv.size) {
1129             int profile = matroska_aac_profile(track->codec_id);
1130             int sri = matroska_aac_sri(track->audio.samplerate);
1131             extradata = av_malloc(5);
1132             if (extradata == NULL)
1133                 return AVERROR(ENOMEM);
1134             extradata[0] = (profile << 3) | ((sri&0x0E) >> 1);
1135             extradata[1] = ((sri&0x01) << 7) | (track->audio.channels<<3);
1136             if (strstr(track->codec_id, "SBR")) {
1137                 sri = matroska_aac_sri(track->audio.out_samplerate);
1138                 extradata[2] = 0x56;
1139                 extradata[3] = 0xE5;
1140                 extradata[4] = 0x80 | (sri<<3);
1141                 extradata_size = 5;
1142             } else
1143                 extradata_size = 2;
1144         } else if (codec_id == CODEC_ID_TTA) {
1145             ByteIOContext b;
1146             extradata_size = 30;
1147             extradata = av_mallocz(extradata_size);
1148             if (extradata == NULL)
1149                 return AVERROR(ENOMEM);
1150             init_put_byte(&b, extradata, extradata_size, 1,
1151                           NULL, NULL, NULL, NULL);
1152             put_buffer(&b, "TTA1", 4);
1153             put_le16(&b, 1);
1154             put_le16(&b, track->audio.channels);
1155             put_le16(&b, track->audio.bitdepth);
1156             put_le32(&b, track->audio.out_samplerate);
1157             put_le32(&b, matroska->ctx->duration * track->audio.out_samplerate);
1158         } else if (codec_id == CODEC_ID_RV10 || codec_id == CODEC_ID_RV20 ||
1159                    codec_id == CODEC_ID_RV30 || codec_id == CODEC_ID_RV40) {
1160             extradata_offset = 26;
1161             track->codec_priv.size -= extradata_offset;
1162         } else if (codec_id == CODEC_ID_RA_144) {
1163             track->audio.out_samplerate = 8000;
1164             track->audio.channels = 1;
1165         } else if (codec_id == CODEC_ID_RA_288 || codec_id == CODEC_ID_COOK ||
1166                    codec_id == CODEC_ID_ATRAC3) {
1167             ByteIOContext b;
1168
1169             init_put_byte(&b, track->codec_priv.data,track->codec_priv.size,
1170                           0, NULL, NULL, NULL, NULL);
1171             url_fskip(&b, 24);
1172             track->audio.coded_framesize = get_be32(&b);
1173             url_fskip(&b, 12);
1174             track->audio.sub_packet_h    = get_be16(&b);
1175             track->audio.frame_size      = get_be16(&b);
1176             track->audio.sub_packet_size = get_be16(&b);
1177             track->audio.buf = av_malloc(track->audio.frame_size * track->audio.sub_packet_h);
1178             if (codec_id == CODEC_ID_RA_288) {
1179                 st->codec->block_align = track->audio.coded_framesize;
1180                 track->codec_priv.size = 0;
1181             } else {
1182                 st->codec->block_align = track->audio.sub_packet_size;
1183                 extradata_offset = 78;
1184                 track->codec_priv.size -= extradata_offset;
1185             }
1186         }
1187
1188         if (codec_id == CODEC_ID_NONE)
1189             av_log(matroska->ctx, AV_LOG_INFO,
1190                    "Unknown/unsupported CodecID %s.\n", track->codec_id);
1191
1192         av_set_pts_info(st, 64, matroska->time_scale*track->time_scale, 1000*1000*1000); /* 64 bit pts in ns */
1193
1194         st->codec->codec_id = codec_id;
1195         st->start_time = 0;
1196         if (strcmp(track->language, "und"))
1197             av_strlcpy(st->language, track->language, 4);
1198
1199         if (track->flag_default)
1200             st->disposition |= AV_DISPOSITION_DEFAULT;
1201
1202         if (track->default_duration)
1203             av_reduce(&st->codec->time_base.num, &st->codec->time_base.den,
1204                       track->default_duration, 1000000000, 30000);
1205
1206         if(extradata){
1207             st->codec->extradata = extradata;
1208             st->codec->extradata_size = extradata_size;
1209         } else if(track->codec_priv.data && track->codec_priv.size > 0){
1210             st->codec->extradata = av_malloc(track->codec_priv.size);
1211             if(st->codec->extradata == NULL)
1212                 return AVERROR(ENOMEM);
1213             st->codec->extradata_size = track->codec_priv.size;
1214             memcpy(st->codec->extradata,
1215                    track->codec_priv.data + extradata_offset,
1216                    track->codec_priv.size);
1217         }
1218
1219         if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
1220             st->codec->codec_type = CODEC_TYPE_VIDEO;
1221             st->codec->codec_tag  = track->video.fourcc;
1222             st->codec->width  = track->video.pixel_width;
1223             st->codec->height = track->video.pixel_height;
1224             av_reduce(&st->codec->sample_aspect_ratio.num,
1225                       &st->codec->sample_aspect_ratio.den,
1226                       st->codec->height * track->video.display_width,
1227                       st->codec-> width * track->video.display_height,
1228                       255);
1229             st->need_parsing = AVSTREAM_PARSE_HEADERS;
1230         } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
1231             st->codec->codec_type = CODEC_TYPE_AUDIO;
1232             st->codec->sample_rate = track->audio.out_samplerate;
1233             st->codec->channels = track->audio.channels;
1234         } else if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE) {
1235             st->codec->codec_type = CODEC_TYPE_SUBTITLE;
1236         }
1237     }
1238
1239     attachements = attachements_list->elem;
1240     for (j=0; j<attachements_list->nb_elem; j++) {
1241         if (!(attachements[j].filename && attachements[j].mime &&
1242               attachements[j].bin.data && attachements[j].bin.size > 0)) {
1243             av_log(matroska->ctx, AV_LOG_ERROR, "incomplete attachment\n");
1244         } else {
1245             AVStream *st = av_new_stream(s, matroska->num_streams++);
1246             if (st == NULL)
1247                 break;
1248             st->filename          = av_strdup(attachements[j].filename);
1249             st->codec->codec_id = CODEC_ID_NONE;
1250             st->codec->codec_type = CODEC_TYPE_ATTACHMENT;
1251             st->codec->extradata  = av_malloc(attachements[j].bin.size);
1252             if(st->codec->extradata == NULL)
1253                 break;
1254             st->codec->extradata_size = attachements[j].bin.size;
1255             memcpy(st->codec->extradata, attachements[j].bin.data, attachements[j].bin.size);
1256
1257             for (i=0; ff_mkv_mime_tags[i].id != CODEC_ID_NONE; i++) {
1258                 if (!strncmp(ff_mkv_mime_tags[i].str, attachements[j].mime,
1259                              strlen(ff_mkv_mime_tags[i].str))) {
1260                     st->codec->codec_id = ff_mkv_mime_tags[i].id;
1261                     break;
1262                 }
1263             }
1264         }
1265     }
1266
1267     chapters = chapters_list->elem;
1268     for (i=0; i<chapters_list->nb_elem; i++)
1269         if (chapters[i].start != AV_NOPTS_VALUE && chapters[i].uid)
1270             ff_new_chapter(s, chapters[i].uid, (AVRational){1, 1000000000},
1271                            chapters[i].start, chapters[i].end,
1272                            chapters[i].title);
1273
1274     index_list = &matroska->index;
1275     index = index_list->elem;
1276     for (i=0; i<index_list->nb_elem; i++) {
1277         EbmlList *pos_list = &index[i].pos;
1278         MatroskaIndexPos *pos = pos_list->elem;
1279         for (j=0; j<pos_list->nb_elem; j++) {
1280             MatroskaTrack *track = matroska_find_track_by_num(matroska,
1281                                                               pos[j].track);
1282             if (track && track->stream)
1283                 av_add_index_entry(track->stream,
1284                                    pos[j].pos + matroska->segment_start,
1285                                    index[i].time*matroska->time_scale/AV_TIME_BASE,
1286                                    0, 0, AVINDEX_KEYFRAME);
1287         }
1288     }
1289
1290     return 0;
1291 }
1292
1293 /*
1294  * Put a packet into our internal queue. Will be delivered to the
1295  * user/application during the next get_packet() call.
1296  */
1297 static void matroska_queue_packet(MatroskaDemuxContext *matroska, AVPacket *pkt)
1298 {
1299     matroska->packets =
1300         av_realloc(matroska->packets, (matroska->num_packets + 1) *
1301                    sizeof(AVPacket *));
1302     matroska->packets[matroska->num_packets] = pkt;
1303     matroska->num_packets++;
1304 }
1305
1306 /*
1307  * Put one packet in an application-supplied AVPacket struct.
1308  * Returns 0 on success or -1 on failure.
1309  */
1310 static int matroska_deliver_packet(MatroskaDemuxContext *matroska,
1311                                    AVPacket *pkt)
1312 {
1313     if (matroska->num_packets > 0) {
1314         memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
1315         av_free(matroska->packets[0]);
1316         if (matroska->num_packets > 1) {
1317             memmove(&matroska->packets[0], &matroska->packets[1],
1318                     (matroska->num_packets - 1) * sizeof(AVPacket *));
1319             matroska->packets =
1320                 av_realloc(matroska->packets, (matroska->num_packets - 1) *
1321                            sizeof(AVPacket *));
1322         } else {
1323             av_freep(&matroska->packets);
1324         }
1325         matroska->num_packets--;
1326         return 0;
1327     }
1328
1329     return -1;
1330 }
1331
1332 /*
1333  * Free all packets in our internal queue.
1334  */
1335 static void matroska_clear_queue(MatroskaDemuxContext *matroska)
1336 {
1337     if (matroska->packets) {
1338         int n;
1339         for (n = 0; n < matroska->num_packets; n++) {
1340             av_free_packet(matroska->packets[n]);
1341             av_free(matroska->packets[n]);
1342         }
1343         av_free(matroska->packets);
1344         matroska->packets = NULL;
1345         matroska->num_packets = 0;
1346     }
1347 }
1348
1349 static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data,
1350                                 int size, int64_t pos, uint64_t cluster_time,
1351                                 uint64_t duration, int is_keyframe)
1352 {
1353     MatroskaTrack *track;
1354     int res = 0;
1355     AVStream *st;
1356     AVPacket *pkt;
1357     int16_t block_time;
1358     uint32_t *lace_size = NULL;
1359     int n, flags, laces = 0;
1360     uint64_t num;
1361
1362     if ((n = matroska_ebmlnum_uint(matroska, data, size, &num)) < 0) {
1363         av_log(matroska->ctx, AV_LOG_ERROR, "EBML block data error\n");
1364         return res;
1365     }
1366     data += n;
1367     size -= n;
1368
1369     track = matroska_find_track_by_num(matroska, num);
1370     if (size <= 3 || !track || !track->stream) {
1371         av_log(matroska->ctx, AV_LOG_INFO,
1372                "Invalid stream %"PRIu64" or size %u\n", num, size);
1373         return res;
1374     }
1375     st = track->stream;
1376     if (st->discard >= AVDISCARD_ALL)
1377         return res;
1378     if (duration == AV_NOPTS_VALUE)
1379         duration = track->default_duration / matroska->time_scale;
1380
1381     block_time = AV_RB16(data);
1382     data += 2;
1383     flags = *data++;
1384     size -= 3;
1385     if (is_keyframe == -1)
1386         is_keyframe = flags & 0x80 ? PKT_FLAG_KEY : 0;
1387
1388     if (matroska->skip_to_keyframe) {
1389         if (!is_keyframe || st != matroska->skip_to_stream)
1390             return res;
1391         matroska->skip_to_keyframe = 0;
1392     }
1393
1394     switch ((flags & 0x06) >> 1) {
1395         case 0x0: /* no lacing */
1396             laces = 1;
1397             lace_size = av_mallocz(sizeof(int));
1398             lace_size[0] = size;
1399             break;
1400
1401         case 0x1: /* xiph lacing */
1402         case 0x2: /* fixed-size lacing */
1403         case 0x3: /* EBML lacing */
1404             assert(size>0); // size <=3 is checked before size-=3 above
1405             laces = (*data) + 1;
1406             data += 1;
1407             size -= 1;
1408             lace_size = av_mallocz(laces * sizeof(int));
1409
1410             switch ((flags & 0x06) >> 1) {
1411                 case 0x1: /* xiph lacing */ {
1412                     uint8_t temp;
1413                     uint32_t total = 0;
1414                     for (n = 0; res == 0 && n < laces - 1; n++) {
1415                         while (1) {
1416                             if (size == 0) {
1417                                 res = -1;
1418                                 break;
1419                             }
1420                             temp = *data;
1421                             lace_size[n] += temp;
1422                             data += 1;
1423                             size -= 1;
1424                             if (temp != 0xff)
1425                                 break;
1426                         }
1427                         total += lace_size[n];
1428                     }
1429                     lace_size[n] = size - total;
1430                     break;
1431                 }
1432
1433                 case 0x2: /* fixed-size lacing */
1434                     for (n = 0; n < laces; n++)
1435                         lace_size[n] = size / laces;
1436                     break;
1437
1438                 case 0x3: /* EBML lacing */ {
1439                     uint32_t total;
1440                     n = matroska_ebmlnum_uint(matroska, data, size, &num);
1441                     if (n < 0) {
1442                         av_log(matroska->ctx, AV_LOG_INFO,
1443                                "EBML block data error\n");
1444                         break;
1445                     }
1446                     data += n;
1447                     size -= n;
1448                     total = lace_size[0] = num;
1449                     for (n = 1; res == 0 && n < laces - 1; n++) {
1450                         int64_t snum;
1451                         int r;
1452                         r = matroska_ebmlnum_sint(matroska, data, size, &snum);
1453                         if (r < 0) {
1454                             av_log(matroska->ctx, AV_LOG_INFO,
1455                                    "EBML block data error\n");
1456                             break;
1457                         }
1458                         data += r;
1459                         size -= r;
1460                         lace_size[n] = lace_size[n - 1] + snum;
1461                         total += lace_size[n];
1462                     }
1463                     lace_size[n] = size - total;
1464                     break;
1465                 }
1466             }
1467             break;
1468     }
1469
1470     if (res == 0) {
1471         uint64_t timecode = AV_NOPTS_VALUE;
1472
1473         if (cluster_time != (uint64_t)-1
1474             && (block_time >= 0 || cluster_time >= -block_time))
1475             timecode = cluster_time + block_time;
1476
1477         for (n = 0; n < laces; n++) {
1478             if (st->codec->codec_id == CODEC_ID_RA_288 ||
1479                 st->codec->codec_id == CODEC_ID_COOK ||
1480                 st->codec->codec_id == CODEC_ID_ATRAC3) {
1481                 int a = st->codec->block_align;
1482                 int sps = track->audio.sub_packet_size;
1483                 int cfs = track->audio.coded_framesize;
1484                 int h = track->audio.sub_packet_h;
1485                 int y = track->audio.sub_packet_cnt;
1486                 int w = track->audio.frame_size;
1487                 int x;
1488
1489                 if (!track->audio.pkt_cnt) {
1490                     if (st->codec->codec_id == CODEC_ID_RA_288)
1491                         for (x=0; x<h/2; x++)
1492                             memcpy(track->audio.buf+x*2*w+y*cfs,
1493                                    data+x*cfs, cfs);
1494                     else
1495                         for (x=0; x<w/sps; x++)
1496                             memcpy(track->audio.buf+sps*(h*x+((h+1)/2)*(y&1)+(y>>1)), data+x*sps, sps);
1497
1498                     if (++track->audio.sub_packet_cnt >= h) {
1499                         track->audio.sub_packet_cnt = 0;
1500                         track->audio.pkt_cnt = h*w / a;
1501                     }
1502                 }
1503                 while (track->audio.pkt_cnt) {
1504                     pkt = av_mallocz(sizeof(AVPacket));
1505                     av_new_packet(pkt, a);
1506                     memcpy(pkt->data, track->audio.buf
1507                            + a * (h*w / a - track->audio.pkt_cnt--), a);
1508                     pkt->pos = pos;
1509                     pkt->stream_index = st->index;
1510                     matroska_queue_packet(matroska, pkt);
1511                 }
1512             } else {
1513                 MatroskaTrackEncoding *encodings = track->encodings.elem;
1514                 int offset = 0, pkt_size = lace_size[n];
1515                 uint8_t *pkt_data = data;
1516
1517                 if (encodings && encodings->scope & 1) {
1518                     offset = matroska_decode_buffer(&pkt_data,&pkt_size, track);
1519                     if (offset < 0)
1520                         continue;
1521                 }
1522
1523                 pkt = av_mallocz(sizeof(AVPacket));
1524                 /* XXX: prevent data copy... */
1525                 if (av_new_packet(pkt, pkt_size+offset) < 0) {
1526                     av_free(pkt);
1527                     res = AVERROR(ENOMEM);
1528                     n = laces-1;
1529                     break;
1530                 }
1531                 if (offset)
1532                     memcpy (pkt->data, encodings->compression.settings.data, offset);
1533                 memcpy (pkt->data+offset, pkt_data, pkt_size);
1534
1535                 if (pkt_data != data)
1536                     av_free(pkt_data);
1537
1538                 if (n == 0)
1539                     pkt->flags = is_keyframe;
1540                 pkt->stream_index = st->index;
1541
1542                 pkt->pts = timecode;
1543                 pkt->pos = pos;
1544                 pkt->duration = duration;
1545
1546                 matroska_queue_packet(matroska, pkt);
1547             }
1548
1549             if (timecode != AV_NOPTS_VALUE)
1550                 timecode = duration ? timecode + duration : AV_NOPTS_VALUE;
1551             data += lace_size[n];
1552         }
1553     }
1554
1555     av_free(lace_size);
1556     return res;
1557 }
1558
1559 static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
1560 {
1561     MatroskaCluster cluster = { 0 };
1562     EbmlList *blocks_list;
1563     MatroskaBlock *blocks;
1564     int i, res;
1565     if (matroska->has_cluster_id){
1566         /* For the first cluster we parse, it's ID was already read as
1567            part of matroska_read_header(), so don't read it again */
1568         res = ebml_parse_id(matroska, matroska_clusters,
1569                             MATROSKA_ID_CLUSTER, &cluster);
1570         matroska->has_cluster_id = 0;
1571     } else
1572         res = ebml_parse(matroska, matroska_clusters, &cluster);
1573     blocks_list = &cluster.blocks;
1574     blocks = blocks_list->elem;
1575     for (i=0; !res && i<blocks_list->nb_elem; i++)
1576         if (blocks[i].bin.size > 0)
1577             res=matroska_parse_block(matroska,
1578                                      blocks[i].bin.data, blocks[i].bin.size,
1579                                      blocks[i].bin.pos,  cluster.timecode,
1580                                      blocks[i].duration, !blocks[i].reference);
1581     ebml_free(matroska_cluster, &cluster);
1582     return res;
1583 }
1584
1585 static int matroska_read_packet(AVFormatContext *s, AVPacket *pkt)
1586 {
1587     MatroskaDemuxContext *matroska = s->priv_data;
1588
1589     while (matroska_deliver_packet(matroska, pkt)) {
1590         if (matroska->done)
1591             return AVERROR(EIO);
1592         if (matroska_parse_cluster(matroska) < 0)
1593             matroska->done = 1;
1594     }
1595
1596     return 0;
1597 }
1598
1599 static int matroska_read_seek(AVFormatContext *s, int stream_index,
1600                               int64_t timestamp, int flags)
1601 {
1602     MatroskaDemuxContext *matroska = s->priv_data;
1603     AVStream *st = s->streams[stream_index];
1604     int index;
1605
1606     index = av_index_search_timestamp(st, timestamp, flags);
1607     if (index < 0)
1608         return 0;
1609
1610     matroska_clear_queue(matroska);
1611
1612     url_fseek(s->pb, st->index_entries[index].pos, SEEK_SET);
1613     matroska->skip_to_keyframe = !(flags & AVSEEK_FLAG_ANY);
1614     matroska->skip_to_stream = st;
1615     av_update_cur_dts(s, st, st->index_entries[index].timestamp);
1616     return 0;
1617 }
1618
1619 static int matroska_read_close(AVFormatContext *s)
1620 {
1621     MatroskaDemuxContext *matroska = s->priv_data;
1622     MatroskaTrack *tracks = matroska->tracks.elem;
1623     int n;
1624
1625     matroska_clear_queue(matroska);
1626
1627     for (n=0; n < matroska->tracks.nb_elem; n++)
1628         if (tracks[n].type == MATROSKA_TRACK_TYPE_AUDIO)
1629             av_free(tracks[n].audio.buf);
1630     ebml_free(matroska_segment, matroska);
1631
1632     return 0;
1633 }
1634
1635 AVInputFormat matroska_demuxer = {
1636     "matroska",
1637     NULL_IF_CONFIG_SMALL("Matroska file format"),
1638     sizeof(MatroskaDemuxContext),
1639     matroska_probe,
1640     matroska_read_header,
1641     matroska_read_packet,
1642     matroska_read_close,
1643     matroska_read_seek,
1644 };