]> git.sesse.net Git - ffmpeg/blob - libavformat/oggenc.c
parse stream headers for audio streams in mkv, needed for frame size
[ffmpeg] / libavformat / oggenc.c
1 /*
2  * Ogg muxer
3  * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at free dot fr>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 #include "libavutil/crc.h"
23 #include "libavcodec/xiph.h"
24 #include "libavcodec/bytestream.h"
25 #include "libavcodec/flac.h"
26 #include "avformat.h"
27 #include "internal.h"
28 #include "vorbiscomment.h"
29
30 typedef struct {
31     int64_t duration;
32     unsigned page_counter;
33     uint8_t *header[3];
34     int header_len[3];
35     /** for theora granule */
36     int kfgshift;
37     int64_t last_kf_pts;
38     int vrev;
39     int eos;
40     unsigned packet_count; ///< number of packet buffered
41 } OGGStreamContext;
42
43 static void ogg_update_checksum(AVFormatContext *s, int64_t crc_offset)
44 {
45     int64_t pos = url_ftell(s->pb);
46     uint32_t checksum = get_checksum(s->pb);
47     url_fseek(s->pb, crc_offset, SEEK_SET);
48     put_be32(s->pb, checksum);
49     url_fseek(s->pb, pos, SEEK_SET);
50 }
51
52 static int ogg_write_page(AVFormatContext *s, const uint8_t *data, int size,
53                           int64_t granule, int stream_index, int flags)
54 {
55     OGGStreamContext *oggstream = s->streams[stream_index]->priv_data;
56     int64_t crc_offset;
57     int page_segments, i;
58
59     if (size >= 255*255) {
60         granule = -1;
61         size = 255*255;
62     } else if (oggstream->eos)
63         flags |= 4;
64
65     page_segments = FFMIN(size/255 + 1, 255);
66
67     init_checksum(s->pb, ff_crc04C11DB7_update, 0);
68     put_tag(s->pb, "OggS");
69     put_byte(s->pb, 0);
70     put_byte(s->pb, flags);
71     put_le64(s->pb, granule);
72     put_le32(s->pb, stream_index);
73     put_le32(s->pb, oggstream->page_counter++);
74     crc_offset = url_ftell(s->pb);
75     put_le32(s->pb, 0); // crc
76     put_byte(s->pb, page_segments);
77     for (i = 0; i < page_segments-1; i++)
78         put_byte(s->pb, 255);
79     put_byte(s->pb, size - (page_segments-1)*255);
80     put_buffer(s->pb, data, size);
81
82     ogg_update_checksum(s, crc_offset);
83     put_flush_packet(s->pb);
84     return size;
85 }
86
87 static uint8_t *ogg_write_vorbiscomment(int offset, int bitexact,
88                                         int *header_len, AVMetadata *m)
89 {
90     const char *vendor = bitexact ? "ffmpeg" : LIBAVFORMAT_IDENT;
91     int size;
92     uint8_t *p, *p0;
93     unsigned int count;
94
95     size = offset + ff_vorbiscomment_length(m, vendor, &count);
96     p = av_mallocz(size);
97     if (!p)
98         return NULL;
99     p0 = p;
100
101     p += offset;
102     ff_vorbiscomment_write(&p, m, vendor, count);
103
104     *header_len = size;
105     return p0;
106 }
107
108 static int ogg_build_flac_headers(AVCodecContext *avctx,
109                                   OGGStreamContext *oggstream, int bitexact,
110                                   AVMetadata *m)
111 {
112     enum FLACExtradataFormat format;
113     uint8_t *streaminfo;
114     uint8_t *p;
115
116     if (!ff_flac_is_extradata_valid(avctx, &format, &streaminfo))
117         return -1;
118
119     // first packet: STREAMINFO
120     oggstream->header_len[0] = 51;
121     oggstream->header[0] = av_mallocz(51); // per ogg flac specs
122     p = oggstream->header[0];
123     if (!p)
124         return AVERROR(ENOMEM);
125     bytestream_put_byte(&p, 0x7F);
126     bytestream_put_buffer(&p, "FLAC", 4);
127     bytestream_put_byte(&p, 1); // major version
128     bytestream_put_byte(&p, 0); // minor version
129     bytestream_put_be16(&p, 1); // headers packets without this one
130     bytestream_put_buffer(&p, "fLaC", 4);
131     bytestream_put_byte(&p, 0x00); // streaminfo
132     bytestream_put_be24(&p, 34);
133     bytestream_put_buffer(&p, streaminfo, FLAC_STREAMINFO_SIZE);
134
135     // second packet: VorbisComment
136     p = ogg_write_vorbiscomment(4, bitexact, &oggstream->header_len[1], m);
137     if (!p)
138         return AVERROR(ENOMEM);
139     oggstream->header[1] = p;
140     bytestream_put_byte(&p, 0x84); // last metadata block and vorbis comment
141     bytestream_put_be24(&p, oggstream->header_len[1] - 4);
142
143     return 0;
144 }
145
146 #define SPEEX_HEADER_SIZE 80
147
148 static int ogg_build_speex_headers(AVCodecContext *avctx,
149                                    OGGStreamContext *oggstream, int bitexact,
150                                    AVMetadata *m)
151 {
152     uint8_t *p;
153
154     if (avctx->extradata_size < SPEEX_HEADER_SIZE)
155         return -1;
156
157     // first packet: Speex header
158     p = av_mallocz(SPEEX_HEADER_SIZE);
159     if (!p)
160         return AVERROR(ENOMEM);
161     oggstream->header[0] = p;
162     oggstream->header_len[0] = SPEEX_HEADER_SIZE;
163     bytestream_put_buffer(&p, avctx->extradata, SPEEX_HEADER_SIZE);
164     AV_WL32(&oggstream->header[0][68], 0);  // set extra_headers to 0
165
166     // second packet: VorbisComment
167     p = ogg_write_vorbiscomment(0, bitexact, &oggstream->header_len[1], m);
168     if (!p)
169         return AVERROR(ENOMEM);
170     oggstream->header[1] = p;
171
172     return 0;
173 }
174
175 static int ogg_write_header(AVFormatContext *s)
176 {
177     OGGStreamContext *oggstream;
178     int i, j;
179     for (i = 0; i < s->nb_streams; i++) {
180         AVStream *st = s->streams[i];
181         if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO)
182             av_set_pts_info(st, 64, 1, st->codec->sample_rate);
183         else if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO)
184             av_set_pts_info(st, 64, st->codec->time_base.num, st->codec->time_base.den);
185         if (st->codec->codec_id != CODEC_ID_VORBIS &&
186             st->codec->codec_id != CODEC_ID_THEORA &&
187             st->codec->codec_id != CODEC_ID_SPEEX  &&
188             st->codec->codec_id != CODEC_ID_FLAC) {
189             av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
190             return -1;
191         }
192
193         if (!st->codec->extradata || !st->codec->extradata_size) {
194             av_log(s, AV_LOG_ERROR, "No extradata present\n");
195             return -1;
196         }
197         oggstream = av_mallocz(sizeof(*oggstream));
198         st->priv_data = oggstream;
199         if (st->codec->codec_id == CODEC_ID_FLAC) {
200             int err = ogg_build_flac_headers(st->codec, oggstream,
201                                              st->codec->flags & CODEC_FLAG_BITEXACT,
202                                              s->metadata);
203             if (err) {
204                 av_log(s, AV_LOG_ERROR, "Error writing FLAC headers\n");
205                 av_freep(&st->priv_data);
206                 return err;
207             }
208         } else if (st->codec->codec_id == CODEC_ID_SPEEX) {
209             int err = ogg_build_speex_headers(st->codec, oggstream,
210                                               st->codec->flags & CODEC_FLAG_BITEXACT,
211                                               s->metadata);
212             if (err) {
213                 av_log(s, AV_LOG_ERROR, "Error writing Speex headers\n");
214                 av_freep(&st->priv_data);
215                 return err;
216             }
217         } else {
218             if (ff_split_xiph_headers(st->codec->extradata, st->codec->extradata_size,
219                                       st->codec->codec_id == CODEC_ID_VORBIS ? 30 : 42,
220                                       oggstream->header, oggstream->header_len) < 0) {
221                 av_log(s, AV_LOG_ERROR, "Extradata corrupted\n");
222                 av_freep(&st->priv_data);
223                 return -1;
224             }
225             if (st->codec->codec_id == CODEC_ID_THEORA) {
226                 /** KFGSHIFT is the width of the less significant section of the granule position
227                     The less significant section is the frame count since the last keyframe */
228                 oggstream->kfgshift = ((oggstream->header[0][40]&3)<<3)|(oggstream->header[0][41]>>5);
229                 oggstream->vrev = oggstream->header[0][9];
230                 av_log(s, AV_LOG_DEBUG, "theora kfgshift %d, vrev %d\n",
231                        oggstream->kfgshift, oggstream->vrev);
232             }
233         }
234     }
235     for (i = 0; i < 3; i++) {
236         for (j = 0; j < s->nb_streams; j++) {
237             AVStream *st = s->streams[j];
238             OGGStreamContext *oggstream = st->priv_data;
239             if (oggstream && oggstream->header_len[i]) {
240                 ogg_write_page(s, oggstream->header[i], oggstream->header_len[i],
241                                0, st->index, i ? 0 : 2); // bos
242             }
243         }
244     }
245     return 0;
246 }
247
248 static int ogg_write_packet(AVFormatContext *s, AVPacket *pkt)
249 {
250     AVStream *st = s->streams[pkt->stream_index];
251     OGGStreamContext *oggstream = st->priv_data;
252     uint8_t *ptr = pkt->data;
253     int ret, size = pkt->size;
254     int64_t granule;
255
256     if (st->codec->codec_id == CODEC_ID_THEORA) {
257         int64_t pts = oggstream->vrev < 1 ? pkt->pts : pkt->pts + pkt->duration;
258         int pframe_count;
259         if (pkt->flags & AV_PKT_FLAG_KEY)
260             oggstream->last_kf_pts = pts;
261         pframe_count = pts - oggstream->last_kf_pts;
262         // prevent frame count from overflow if key frame flag is not set
263         if (pframe_count >= (1<<oggstream->kfgshift)) {
264             oggstream->last_kf_pts += pframe_count;
265             pframe_count = 0;
266         }
267         granule = (oggstream->last_kf_pts<<oggstream->kfgshift) | pframe_count;
268     } else
269         granule = pkt->pts + pkt->duration;
270     oggstream->duration = granule;
271     do {
272         ret = ogg_write_page(s, ptr, size, granule, pkt->stream_index, ptr != pkt->data);
273         ptr  += ret; size -= ret;
274     } while (size > 0 || ret == 255*255); // need to output a last nil page
275
276     return 0;
277 }
278
279 static int ogg_compare_granule(AVFormatContext *s, AVPacket *next, AVPacket *pkt)
280 {
281     AVStream *st2 = s->streams[next->stream_index];
282     AVStream *st  = s->streams[pkt ->stream_index];
283
284     int64_t next_granule = av_rescale_q(next->pts + next->duration,
285                                         st2->time_base, AV_TIME_BASE_Q);
286     int64_t cur_granule  = av_rescale_q(pkt ->pts + pkt ->duration,
287                                         st ->time_base, AV_TIME_BASE_Q);
288     return next_granule > cur_granule;
289 }
290
291 static int ogg_interleave_per_granule(AVFormatContext *s, AVPacket *out, AVPacket *pkt, int flush)
292 {
293     OGGStreamContext *ogg;
294     int i, stream_count = 0;
295     int interleaved = 0;
296
297     if (pkt) {
298         ff_interleave_add_packet(s, pkt, ogg_compare_granule);
299         ogg = s->streams[pkt->stream_index]->priv_data;
300         ogg->packet_count++;
301     }
302
303     for (i = 0; i < s->nb_streams; i++) {
304         ogg = s->streams[i]->priv_data;
305         stream_count += !!ogg->packet_count;
306         interleaved += ogg->packet_count > 1;
307     }
308
309     if ((s->nb_streams == stream_count && interleaved == stream_count) ||
310         (flush && stream_count)) {
311         AVPacketList *pktl= s->packet_buffer;
312         *out= pktl->pkt;
313         s->packet_buffer = pktl->next;
314
315         ogg = s->streams[out->stream_index]->priv_data;
316         if (flush && ogg->packet_count == 1)
317             ogg->eos = 1;
318         ogg->packet_count--;
319
320         if(!s->packet_buffer)
321             s->packet_buffer_end= NULL;
322
323         if(s->streams[out->stream_index]->last_in_packet_buffer == pktl)
324             s->streams[out->stream_index]->last_in_packet_buffer= NULL;
325
326         av_freep(&pktl);
327         return 1;
328     } else {
329         av_init_packet(out);
330         return 0;
331     }
332 }
333
334 static int ogg_write_trailer(AVFormatContext *s)
335 {
336     int i;
337     for (i = 0; i < s->nb_streams; i++) {
338         AVStream *st = s->streams[i];
339         OGGStreamContext *oggstream = st->priv_data;
340         if (st->codec->codec_id == CODEC_ID_FLAC ||
341             st->codec->codec_id == CODEC_ID_SPEEX) {
342             av_free(oggstream->header[0]);
343             av_free(oggstream->header[1]);
344         }
345         av_freep(&st->priv_data);
346     }
347     return 0;
348 }
349
350 AVOutputFormat ogg_muxer = {
351     "ogg",
352     NULL_IF_CONFIG_SMALL("Ogg"),
353     "application/ogg",
354     "ogg,ogv,spx",
355     0,
356     CODEC_ID_FLAC,
357     CODEC_ID_THEORA,
358     ogg_write_header,
359     ogg_write_packet,
360     ogg_write_trailer,
361     .interleave_packet = ogg_interleave_per_granule,
362     .metadata_conv = ff_vorbiscomment_metadata_conv,
363 };