2 * Audio and Video frame extraction
3 * Copyright (c) 2003 Fabrice Bellard.
4 * Copyright (c) 2003 Michael Niedermayer.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "mpegvideo.h"
22 #include "mpegaudio.h"
24 AVCodecParser *av_first_parser = NULL;
26 void av_register_codec_parser(AVCodecParser *parser)
28 parser->next = av_first_parser;
29 av_first_parser = parser;
32 AVCodecParserContext *av_parser_init(int codec_id)
34 AVCodecParserContext *s;
35 AVCodecParser *parser;
38 if(codec_id == CODEC_ID_NONE)
41 for(parser = av_first_parser; parser != NULL; parser = parser->next) {
42 if (parser->codec_ids[0] == codec_id ||
43 parser->codec_ids[1] == codec_id ||
44 parser->codec_ids[2] == codec_id ||
45 parser->codec_ids[3] == codec_id ||
46 parser->codec_ids[4] == codec_id)
51 s = av_mallocz(sizeof(AVCodecParserContext));
55 s->priv_data = av_mallocz(parser->priv_data_size);
60 if (parser->parser_init) {
61 ret = parser->parser_init(s);
63 av_free(s->priv_data);
72 /* NOTE: buf_size == 0 is used to signal EOF so that the last frame
73 can be returned if necessary */
74 int av_parser_parse(AVCodecParserContext *s,
75 AVCodecContext *avctx,
76 uint8_t **poutbuf, int *poutbuf_size,
77 const uint8_t *buf, int buf_size,
78 int64_t pts, int64_t dts)
81 uint8_t dummy_buf[FF_INPUT_BUFFER_PADDING_SIZE];
84 /* padding is always necessary even if EOF, so we add it here */
85 memset(dummy_buf, 0, sizeof(dummy_buf));
88 /* add a new packet descriptor */
89 k = (s->cur_frame_start_index + 1) & (AV_PARSER_PTS_NB - 1);
90 s->cur_frame_start_index = k;
91 s->cur_frame_offset[k] = s->cur_offset;
92 s->cur_frame_pts[k] = pts;
93 s->cur_frame_dts[k] = dts;
95 /* fill first PTS/DTS */
96 if (s->fetch_timestamp){
100 s->cur_frame_pts[k] =
101 s->cur_frame_dts[k] = AV_NOPTS_VALUE;
105 /* WARNING: the returned index can be negative */
106 index = s->parser->parser_parse(s, avctx, poutbuf, poutbuf_size, buf, buf_size);
107 //av_log(NULL, AV_LOG_DEBUG, "parser: in:%lld, %lld, out:%lld, %lld, in:%d out:%d id:%d\n", pts, dts, s->last_pts, s->last_dts, buf_size, *poutbuf_size, avctx->codec_id);
108 /* update the file pointer */
110 /* fill the data for the current frame */
111 s->frame_offset = s->last_frame_offset;
112 s->pts = s->last_pts;
113 s->dts = s->last_dts;
115 /* offset of the next frame */
116 s->last_frame_offset = s->cur_offset + index;
117 /* find the packet in which the new frame starts. It
118 is tricky because of MPEG video start codes
119 which can begin in one packet and finish in
120 another packet. In the worst case, an MPEG
121 video start code could be in 4 different
123 k = s->cur_frame_start_index;
124 for(i = 0; i < AV_PARSER_PTS_NB; i++) {
125 if (s->last_frame_offset >= s->cur_frame_offset[k])
127 k = (k - 1) & (AV_PARSER_PTS_NB - 1);
130 s->last_pts = s->cur_frame_pts[k];
131 s->last_dts = s->cur_frame_dts[k];
133 /* some parsers tell us the packet size even before seeing the first byte of the next packet,
134 so the next pts/dts is in the next chunk */
135 if(index == buf_size){
136 s->fetch_timestamp=1;
141 s->cur_offset += index;
147 * @return 0 if the output buffer is a subset of the input, 1 if it is allocated and must be freed
149 int av_parser_change(AVCodecParserContext *s,
150 AVCodecContext *avctx,
151 uint8_t **poutbuf, int *poutbuf_size,
152 const uint8_t *buf, int buf_size, int keyframe){
154 if(s && s->parser->split){
155 if((avctx->flags & CODEC_FLAG_GLOBAL_HEADER) || (avctx->flags2 & CODEC_FLAG2_LOCAL_HEADER)){
156 int i= s->parser->split(avctx, buf, buf_size);
162 /* cast to avoid warning about discarding qualifiers */
163 *poutbuf= (uint8_t *) buf;
164 *poutbuf_size= buf_size;
165 if(avctx->extradata){
166 if( (keyframe && (avctx->flags2 & CODEC_FLAG2_LOCAL_HEADER))
167 /*||(s->pict_type != I_TYPE && (s->flags & PARSER_FLAG_DUMP_EXTRADATA_AT_NOKEY))*/
168 /*||(? && (s->flags & PARSER_FLAG_DUMP_EXTRADATA_AT_BEGIN)*/){
169 int size= buf_size + avctx->extradata_size;
171 *poutbuf= av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE);
173 memcpy(*poutbuf, avctx->extradata, avctx->extradata_size);
174 memcpy((*poutbuf) + avctx->extradata_size, buf, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
182 void av_parser_close(AVCodecParserContext *s)
184 if (s->parser->parser_close)
185 s->parser->parser_close(s);
186 av_free(s->priv_data);
190 /*****************************************************/
192 //#define END_NOT_FOUND (-100)
194 #define PICTURE_START_CODE 0x00000100
195 #define SEQ_START_CODE 0x000001b3
196 #define EXT_START_CODE 0x000001b5
197 #define SLICE_MIN_START_CODE 0x00000101
198 #define SLICE_MAX_START_CODE 0x000001af
200 typedef struct ParseContext1{
202 /* XXX/FIXME PC1 vs. PC */
205 int progressive_sequence;
208 /* XXX: suppress that, needed by MPEG4 */
214 * combines the (truncated) bitstream to a complete frame
215 * @returns -1 if no complete frame could be created
217 int ff_combine_frame(ParseContext *pc, int next, uint8_t **buf, int *buf_size)
221 printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
222 printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
226 /* copy overreaded bytes from last frame into buffer */
227 for(; pc->overread>0; pc->overread--){
228 pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
231 /* flush remaining if EOF */
232 if(!*buf_size && next == END_NOT_FOUND){
236 pc->last_index= pc->index;
238 /* copy into buffer end return */
239 if(next == END_NOT_FOUND){
240 pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
242 memcpy(&pc->buffer[pc->index], *buf, *buf_size);
243 pc->index += *buf_size;
248 pc->overread_index= pc->index + next;
250 /* append to buffer */
252 pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
254 memcpy(&pc->buffer[pc->index], *buf, next + FF_INPUT_BUFFER_PADDING_SIZE );
259 /* store overread bytes */
260 for(;next < 0; next++){
261 pc->state = (pc->state<<8) | pc->buffer[pc->last_index + next];
267 printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
268 printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
275 static int find_start_code(const uint8_t **pbuf_ptr, const uint8_t *buf_end)
277 const uint8_t *buf_ptr;
278 unsigned int state=0xFFFFFFFF, v;
282 while (buf_ptr < buf_end) {
284 if (state == 0x000001) {
285 state = ((state << 8) | v) & 0xffffff;
289 state = ((state << 8) | v) & 0xffffff;
297 /* XXX: merge with libavcodec ? */
298 #define MPEG1_FRAME_RATE_BASE 1001
300 static const int frame_rate_tab[16] = {
312 // libmpeg3's "Unofficial economy rates": (10-13)
317 // random, just to avoid segfault !never encode these
322 //FIXME move into mpeg12.c
323 static void mpegvideo_extract_headers(AVCodecParserContext *s,
324 AVCodecContext *avctx,
325 const uint8_t *buf, int buf_size)
327 ParseContext1 *pc = s->priv_data;
328 const uint8_t *buf_end;
330 int frame_rate_index, ext_type, bytes_left;
331 int frame_rate_ext_n, frame_rate_ext_d;
332 int picture_structure, top_field_first, repeat_first_field, progressive_frame;
333 int horiz_size_ext, vert_size_ext, bit_rate_ext;
334 //FIXME replace the crap with get_bits()
336 buf_end = buf + buf_size;
337 while (buf < buf_end) {
338 start_code = find_start_code(&buf, buf_end);
339 bytes_left = buf_end - buf;
341 case PICTURE_START_CODE:
342 if (bytes_left >= 2) {
343 s->pict_type = (buf[1] >> 3) & 7;
347 if (bytes_left >= 7) {
348 pc->width = (buf[0] << 4) | (buf[1] >> 4);
349 pc->height = ((buf[1] & 0x0f) << 8) | buf[2];
350 avcodec_set_dimensions(avctx, pc->width, pc->height);
351 frame_rate_index = buf[3] & 0xf;
352 pc->frame_rate = avctx->time_base.den = frame_rate_tab[frame_rate_index];
353 avctx->time_base.num = MPEG1_FRAME_RATE_BASE;
354 avctx->bit_rate = ((buf[4]<<10) | (buf[5]<<2) | (buf[6]>>6))*400;
355 avctx->codec_id = CODEC_ID_MPEG1VIDEO;
360 if (bytes_left >= 1) {
361 ext_type = (buf[0] >> 4);
363 case 0x1: /* sequence extension */
364 if (bytes_left >= 6) {
365 horiz_size_ext = ((buf[1] & 1) << 1) | (buf[2] >> 7);
366 vert_size_ext = (buf[2] >> 5) & 3;
367 bit_rate_ext = ((buf[2] & 0x1F)<<7) | (buf[3]>>1);
368 frame_rate_ext_n = (buf[5] >> 5) & 3;
369 frame_rate_ext_d = (buf[5] & 0x1f);
370 pc->progressive_sequence = buf[1] & (1 << 3);
371 avctx->has_b_frames= !(buf[5] >> 7);
373 pc->width |=(horiz_size_ext << 12);
374 pc->height |=( vert_size_ext << 12);
375 avctx->bit_rate += (bit_rate_ext << 18) * 400;
376 avcodec_set_dimensions(avctx, pc->width, pc->height);
377 avctx->time_base.den = pc->frame_rate * (frame_rate_ext_n + 1);
378 avctx->time_base.num = MPEG1_FRAME_RATE_BASE * (frame_rate_ext_d + 1);
379 avctx->codec_id = CODEC_ID_MPEG2VIDEO;
380 avctx->sub_id = 2; /* forces MPEG2 */
383 case 0x8: /* picture coding extension */
384 if (bytes_left >= 5) {
385 picture_structure = buf[2]&3;
386 top_field_first = buf[3] & (1 << 7);
387 repeat_first_field = buf[3] & (1 << 1);
388 progressive_frame = buf[4] & (1 << 7);
390 /* check if we must repeat the frame */
391 if (repeat_first_field) {
392 if (pc->progressive_sequence) {
397 } else if (progressive_frame) {
402 /* the packet only represents half a frame
403 XXX,FIXME maybe find a different solution */
404 if(picture_structure != 3)
414 /* we stop parsing when we encounter a slice. It ensures
415 that this function takes a negligible amount of time */
416 if (start_code >= SLICE_MIN_START_CODE &&
417 start_code <= SLICE_MAX_START_CODE)
425 static int mpegvideo_parse(AVCodecParserContext *s,
426 AVCodecContext *avctx,
427 uint8_t **poutbuf, int *poutbuf_size,
428 const uint8_t *buf, int buf_size)
430 ParseContext1 *pc1 = s->priv_data;
431 ParseContext *pc= &pc1->pc;
434 if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
437 next= ff_mpeg1_find_frame_end(pc, buf, buf_size);
439 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
446 /* we have a full frame : we just parse the first few MPEG headers
447 to have the full timing information. The time take by this
448 function should be negligible for uncorrupted streams */
449 mpegvideo_extract_headers(s, avctx, buf, buf_size);
451 printf("pict_type=%d frame_rate=%0.3f repeat_pict=%d\n",
452 s->pict_type, (double)avctx->time_base.den / avctx->time_base.num, s->repeat_pict);
455 *poutbuf = (uint8_t *)buf;
456 *poutbuf_size = buf_size;
460 static int mpegvideo_split(AVCodecContext *avctx,
461 const uint8_t *buf, int buf_size)
466 for(i=0; i<buf_size; i++){
467 state= (state<<8) | buf[i];
468 if(state != 0x1B3 && state != 0x1B5 && state < 0x200 && state >= 0x100)
474 void ff_parse_close(AVCodecParserContext *s)
476 ParseContext *pc = s->priv_data;
481 static void parse1_close(AVCodecParserContext *s)
483 ParseContext1 *pc1 = s->priv_data;
485 av_free(pc1->pc.buffer);
489 /*************************/
492 /* XXX: make it use less memory */
493 static int av_mpeg4_decode_header(AVCodecParserContext *s1,
494 AVCodecContext *avctx,
495 const uint8_t *buf, int buf_size)
497 ParseContext1 *pc = s1->priv_data;
498 MpegEncContext *s = pc->enc;
499 GetBitContext gb1, *gb = &gb1;
503 s->current_picture_ptr = &s->current_picture;
505 if (avctx->extradata_size && pc->first_picture){
506 init_get_bits(gb, avctx->extradata, avctx->extradata_size*8);
507 ret = ff_mpeg4_decode_picture_header(s, gb);
510 init_get_bits(gb, buf, 8 * buf_size);
511 ret = ff_mpeg4_decode_picture_header(s, gb);
513 avcodec_set_dimensions(avctx, s->width, s->height);
515 s1->pict_type= s->pict_type;
516 pc->first_picture = 0;
520 static int mpeg4video_parse_init(AVCodecParserContext *s)
522 ParseContext1 *pc = s->priv_data;
524 pc->enc = av_mallocz(sizeof(MpegEncContext));
527 pc->first_picture = 1;
531 static int mpeg4video_parse(AVCodecParserContext *s,
532 AVCodecContext *avctx,
533 uint8_t **poutbuf, int *poutbuf_size,
534 const uint8_t *buf, int buf_size)
536 ParseContext *pc = s->priv_data;
539 if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
542 next= ff_mpeg4_find_frame_end(pc, buf, buf_size);
544 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
550 av_mpeg4_decode_header(s, avctx, buf, buf_size);
552 *poutbuf = (uint8_t *)buf;
553 *poutbuf_size = buf_size;
557 static int mpeg4video_split(AVCodecContext *avctx,
558 const uint8_t *buf, int buf_size)
563 for(i=0; i<buf_size; i++){
564 state= (state<<8) | buf[i];
565 if(state == 0x1B3 || state == 0x1B6)
571 /*************************/
573 typedef struct MpegAudioParseContext {
574 uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE]; /* input buffer */
577 int free_format_frame_size;
578 int free_format_next_header;
581 } MpegAudioParseContext;
583 #define MPA_HEADER_SIZE 4
585 /* header + layer + bitrate + freq + lsf/mpeg25 */
586 #undef SAME_HEADER_MASK /* mpegaudio.h defines different version */
587 #define SAME_HEADER_MASK \
588 (0xffe00000 | (3 << 17) | (3 << 10) | (3 << 19))
590 static int mpegaudio_parse_init(AVCodecParserContext *s1)
592 MpegAudioParseContext *s = s1->priv_data;
593 s->inbuf_ptr = s->inbuf;
597 static int mpegaudio_parse(AVCodecParserContext *s1,
598 AVCodecContext *avctx,
599 uint8_t **poutbuf, int *poutbuf_size,
600 const uint8_t *buf, int buf_size)
602 MpegAudioParseContext *s = s1->priv_data;
605 const uint8_t *buf_ptr;
610 while (buf_size > 0) {
611 len = s->inbuf_ptr - s->inbuf;
612 if (s->frame_size == 0) {
613 /* special case for next header for first frame in free
614 format case (XXX: find a simpler method) */
615 if (s->free_format_next_header != 0) {
616 s->inbuf[0] = s->free_format_next_header >> 24;
617 s->inbuf[1] = s->free_format_next_header >> 16;
618 s->inbuf[2] = s->free_format_next_header >> 8;
619 s->inbuf[3] = s->free_format_next_header;
620 s->inbuf_ptr = s->inbuf + 4;
621 s->free_format_next_header = 0;
624 /* no header seen : find one. We need at least MPA_HEADER_SIZE
626 len = MPA_HEADER_SIZE - len;
630 memcpy(s->inbuf_ptr, buf_ptr, len);
635 if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) {
637 sr= avctx->sample_rate;
638 header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
639 (s->inbuf[2] << 8) | s->inbuf[3];
641 ret = mpa_decode_header(avctx, header);
644 /* no sync found : move by one byte (inefficient, but simple!) */
645 memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
647 dprintf("skip %x\n", header);
648 /* reset free format frame size to give a chance
649 to get a new bitrate */
650 s->free_format_frame_size = 0;
652 if((header&SAME_HEADER_MASK) != (s->header&SAME_HEADER_MASK) && s->header)
659 /* free format: prepare to compute frame size */
660 if (decode_header(s, header) == 1) {
665 if(s->header_count <= 0)
666 avctx->sample_rate= sr; //FIXME ugly
670 if (s->frame_size == -1) {
671 /* free format : find next sync to compute frame size */
672 len = MPA_MAX_CODED_FRAME_SIZE - len;
676 /* frame too long: resync */
678 memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
685 memcpy(s->inbuf_ptr, buf_ptr, len);
686 /* check for header */
687 p = s->inbuf_ptr - 3;
688 pend = s->inbuf_ptr + len - 4;
690 header = (p[0] << 24) | (p[1] << 16) |
692 header1 = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
693 (s->inbuf[2] << 8) | s->inbuf[3];
694 /* check with high probability that we have a
696 if ((header & SAME_HEADER_MASK) ==
697 (header1 & SAME_HEADER_MASK)) {
698 /* header found: update pointers */
699 len = (p + 4) - s->inbuf_ptr;
703 /* compute frame size */
704 s->free_format_next_header = header;
705 s->free_format_frame_size = s->inbuf_ptr - s->inbuf;
706 padding = (header1 >> 9) & 1;
708 s->free_format_frame_size -= padding * 4;
710 s->free_format_frame_size -= padding;
711 dprintf("free frame size=%d padding=%d\n",
712 s->free_format_frame_size, padding);
713 decode_header(s, header1);
718 /* not found: simply increase pointers */
725 if (len < s->frame_size) {
726 if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE)
727 s->frame_size = MPA_MAX_CODED_FRAME_SIZE;
728 len = s->frame_size - len;
731 memcpy(s->inbuf_ptr, buf_ptr, len);
737 if (s->frame_size > 0 &&
738 (s->inbuf_ptr - s->inbuf) >= s->frame_size) {
739 if(s->header_count > 0){
741 *poutbuf_size = s->inbuf_ptr - s->inbuf;
743 s->inbuf_ptr = s->inbuf;
748 return buf_ptr - buf;
753 extern int ff_a52_syncinfo (AVCodecContext * avctx, const uint8_t * buf,
754 int * flags, int * sample_rate, int * bit_rate);
756 extern int a52_syncinfo (const uint8_t * buf, int * flags,
757 int * sample_rate, int * bit_rate);
760 typedef struct AC3ParseContext {
761 uint8_t inbuf[4096]; /* input buffer */
767 #define AC3_HEADER_SIZE 7
770 static int ac3_parse_init(AVCodecParserContext *s1)
772 AC3ParseContext *s = s1->priv_data;
773 s->inbuf_ptr = s->inbuf;
777 static int ac3_parse(AVCodecParserContext *s1,
778 AVCodecContext *avctx,
779 uint8_t **poutbuf, int *poutbuf_size,
780 const uint8_t *buf, int buf_size)
782 AC3ParseContext *s = s1->priv_data;
783 const uint8_t *buf_ptr;
784 int len, sample_rate, bit_rate;
785 static const int ac3_channels[8] = {
786 2, 1, 2, 3, 3, 4, 4, 5
793 while (buf_size > 0) {
794 len = s->inbuf_ptr - s->inbuf;
795 if (s->frame_size == 0) {
796 /* no header seen : find one. We need at least 7 bytes to parse it */
797 len = AC3_HEADER_SIZE - len;
800 memcpy(s->inbuf_ptr, buf_ptr, len);
804 if ((s->inbuf_ptr - s->inbuf) == AC3_HEADER_SIZE) {
806 len = ff_a52_syncinfo(avctx, s->inbuf, &s->flags, &sample_rate, &bit_rate);
808 len = a52_syncinfo(s->inbuf, &s->flags, &sample_rate, &bit_rate);
811 /* no sync found : move by one byte (inefficient, but simple!) */
812 memmove(s->inbuf, s->inbuf + 1, AC3_HEADER_SIZE - 1);
816 /* update codec info */
817 avctx->sample_rate = sample_rate;
818 /* set channels,except if the user explicitly requests 1 or 2 channels, XXX/FIXME this is a bit ugly */
819 if(avctx->channels!=1 && avctx->channels!=2){
820 avctx->channels = ac3_channels[s->flags & 7];
821 if (s->flags & A52_LFE)
824 avctx->bit_rate = bit_rate;
825 avctx->frame_size = 6 * 256;
828 } else if (len < s->frame_size) {
829 len = s->frame_size - len;
833 memcpy(s->inbuf_ptr, buf_ptr, len);
839 *poutbuf_size = s->frame_size;
840 s->inbuf_ptr = s->inbuf;
845 return buf_ptr - buf;
849 AVCodecParser mpegvideo_parser = {
850 { CODEC_ID_MPEG1VIDEO, CODEC_ID_MPEG2VIDEO },
851 sizeof(ParseContext1),
858 AVCodecParser mpeg4video_parser = {
860 sizeof(ParseContext1),
861 mpeg4video_parse_init,
867 AVCodecParser mpegaudio_parser = {
868 { CODEC_ID_MP2, CODEC_ID_MP3 },
869 sizeof(MpegAudioParseContext),
870 mpegaudio_parse_init,
876 AVCodecParser ac3_parser = {
878 sizeof(AC3ParseContext),