2 * Audio and Video frame extraction
3 * Copyright (c) 2003 Fabrice Bellard.
4 * Copyright (c) 2003 Michael Niedermayer.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "mpegvideo.h"
22 #include "mpegaudio.h"
24 AVCodecParser *av_first_parser = NULL;
26 void av_register_codec_parser(AVCodecParser *parser)
28 parser->next = av_first_parser;
29 av_first_parser = parser;
32 AVCodecParserContext *av_parser_init(int codec_id)
34 AVCodecParserContext *s;
35 AVCodecParser *parser;
38 for(parser = av_first_parser; parser != NULL; parser = parser->next) {
39 if (parser->codec_ids[0] == codec_id ||
40 parser->codec_ids[1] == codec_id ||
41 parser->codec_ids[2] == codec_id)
46 s = av_mallocz(sizeof(AVCodecParserContext));
50 s->priv_data = av_mallocz(parser->priv_data_size);
55 if (parser->parser_init) {
56 ret = parser->parser_init(s);
58 av_free(s->priv_data);
66 /* NOTE: buf_size == 0 is used to signal EOF so that the last frame
67 can be returned if necessary */
68 int av_parser_parse(AVCodecParserContext *s,
69 AVCodecContext *avctx,
70 uint8_t **poutbuf, int *poutbuf_size,
71 const uint8_t *buf, int buf_size,
72 int64_t pts, int64_t dts)
75 uint8_t dummy_buf[FF_INPUT_BUFFER_PADDING_SIZE];
78 /* padding is always necessary even if EOF, so we add it here */
79 memset(dummy_buf, 0, sizeof(dummy_buf));
82 /* add a new packet descriptor */
83 k = (s->cur_frame_start_index + 1) & (AV_PARSER_PTS_NB - 1);
84 s->cur_frame_start_index = k;
85 s->cur_frame_offset[k] = s->cur_offset;
86 s->cur_frame_pts[k] = pts;
87 s->cur_frame_dts[k] = dts;
89 /* fill first PTS/DTS */
90 if (s->cur_offset == 0) {
96 /* WARNING: the returned index can be negative */
97 index = s->parser->parser_parse(s, avctx, poutbuf, poutbuf_size, buf, buf_size);
98 /* update the file pointer */
100 /* fill the data for the current frame */
101 s->frame_offset = s->last_frame_offset;
102 s->pts = s->last_pts;
103 s->dts = s->last_dts;
105 /* offset of the next frame */
106 s->last_frame_offset = s->cur_offset + index;
107 /* find the packet in which the new frame starts. It
108 is tricky because of MPEG video start codes
109 which can begin in one packet and finish in
110 another packet. In the worst case, an MPEG
111 video start code could be in 4 different
113 k = s->cur_frame_start_index;
114 for(i = 0; i < AV_PARSER_PTS_NB; i++) {
115 if (s->last_frame_offset >= s->cur_frame_offset[k])
117 k = (k - 1) & (AV_PARSER_PTS_NB - 1);
119 s->last_pts = s->cur_frame_pts[k];
120 s->last_dts = s->cur_frame_dts[k];
124 s->cur_offset += index;
128 void av_parser_close(AVCodecParserContext *s)
130 if (s->parser->parser_close)
131 s->parser->parser_close(s);
132 av_free(s->priv_data);
136 /*****************************************************/
138 //#define END_NOT_FOUND (-100)
140 #define PICTURE_START_CODE 0x00000100
141 #define SEQ_START_CODE 0x000001b3
142 #define EXT_START_CODE 0x000001b5
143 #define SLICE_MIN_START_CODE 0x00000101
144 #define SLICE_MAX_START_CODE 0x000001af
146 typedef struct ParseContext1{
151 uint32_t state; ///< contains the last few bytes in MSB order
152 int frame_start_found;
153 int overread; ///< the number of bytes which where irreversibly read from the next frame
154 int overread_index; ///< the index into ParseContext1.buffer of the overreaded bytes
158 int progressive_sequence;
161 /* XXX: suppress that, needed by MPEG4 */
167 * combines the (truncated) bitstream to a complete frame
168 * @returns -1 if no complete frame could be created
170 static int ff_combine_frame1(ParseContext1 *pc, int next, uint8_t **buf, int *buf_size)
174 printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
175 printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
179 /* copy overreaded bytes from last frame into buffer */
180 for(; pc->overread>0; pc->overread--){
181 pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
184 pc->last_index= pc->index;
186 /* copy into buffer end return */
187 if(next == END_NOT_FOUND){
188 pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
190 memcpy(&pc->buffer[pc->index], *buf, *buf_size);
191 pc->index += *buf_size;
196 pc->overread_index= pc->index + next;
198 /* append to buffer */
200 pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
202 memcpy(&pc->buffer[pc->index], *buf, next + FF_INPUT_BUFFER_PADDING_SIZE );
207 /* store overread bytes */
208 for(;next < 0; next++){
209 pc->state = (pc->state<<8) | pc->buffer[pc->last_index + next];
215 printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
216 printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
224 * finds the end of the current frame in the bitstream.
225 * @return the position of the first byte of the next frame, or -1
227 static int mpeg1_find_frame_end(ParseContext1 *pc, const uint8_t *buf, int buf_size)
235 if(!pc->frame_start_found){
236 for(i=0; i<buf_size; i++){
237 state= (state<<8) | buf[i];
238 if(state >= SLICE_MIN_START_CODE && state <= SLICE_MAX_START_CODE){
240 pc->frame_start_found=1;
246 if(pc->frame_start_found){
247 /* EOF considered as end of frame */
250 for(; i<buf_size; i++){
251 state= (state<<8) | buf[i];
252 if((state&0xFFFFFF00) == 0x100){
253 if(state < SLICE_MIN_START_CODE || state > SLICE_MAX_START_CODE){
254 pc->frame_start_found=0;
262 return END_NOT_FOUND;
265 static int find_start_code(const uint8_t **pbuf_ptr, const uint8_t *buf_end)
267 const uint8_t *buf_ptr;
268 unsigned int state=0xFFFFFFFF, v;
272 while (buf_ptr < buf_end) {
274 if (state == 0x000001) {
275 state = ((state << 8) | v) & 0xffffff;
279 state = ((state << 8) | v) & 0xffffff;
287 /* XXX: merge with libavcodec ? */
288 #define MPEG1_FRAME_RATE_BASE 1001
290 static const int frame_rate_tab[16] = {
302 // libmpeg3's "Unofficial economy rates": (10-13)
307 // random, just to avoid segfault !never encode these
312 static void mpegvideo_extract_headers(AVCodecParserContext *s,
313 AVCodecContext *avctx,
314 const uint8_t *buf, int buf_size)
316 ParseContext1 *pc = s->priv_data;
317 const uint8_t *buf_end;
319 int frame_rate_index, ext_type, bytes_left;
320 int frame_rate_ext_n, frame_rate_ext_d;
321 int top_field_first, repeat_first_field, progressive_frame;
322 int horiz_size_ext, vert_size_ext;
325 buf_end = buf + buf_size;
326 while (buf < buf_end) {
327 start_code = find_start_code(&buf, buf_end);
328 bytes_left = buf_end - buf;
330 case PICTURE_START_CODE:
331 if (bytes_left >= 2) {
332 s->pict_type = (buf[1] >> 3) & 7;
336 if (bytes_left >= 4) {
337 pc->width = avctx->width = (buf[0] << 4) | (buf[1] >> 4);
338 pc->height = avctx->height = ((buf[1] & 0x0f) << 8) | buf[2];
339 frame_rate_index = buf[3] & 0xf;
340 pc->frame_rate = avctx->frame_rate = frame_rate_tab[frame_rate_index];
341 avctx->frame_rate_base = MPEG1_FRAME_RATE_BASE;
342 avctx->codec_id = CODEC_ID_MPEG1VIDEO;
347 if (bytes_left >= 1) {
348 ext_type = (buf[0] >> 4);
350 case 0x1: /* sequence extension */
351 if (bytes_left >= 6) {
352 horiz_size_ext = ((buf[1] & 1) << 1) | (buf[2] >> 7);
353 vert_size_ext = (buf[2] >> 5) & 3;
354 frame_rate_ext_n = (buf[5] >> 5) & 3;
355 frame_rate_ext_d = (buf[5] & 0x1f);
356 pc->progressive_sequence = buf[1] & (1 << 3);
358 avctx->width = pc->width | (horiz_size_ext << 12);
359 avctx->height = pc->height | (vert_size_ext << 12);
360 avctx->frame_rate = pc->frame_rate * (frame_rate_ext_n + 1);
361 avctx->frame_rate_base = MPEG1_FRAME_RATE_BASE * (frame_rate_ext_d + 1);
362 avctx->codec_id = CODEC_ID_MPEG2VIDEO;
363 avctx->sub_id = 2; /* forces MPEG2 */
366 case 0x8: /* picture coding extension */
367 if (bytes_left >= 5) {
368 top_field_first = buf[3] & (1 << 7);
369 repeat_first_field = buf[3] & (1 << 1);
370 progressive_frame = buf[4] & (1 << 7);
372 /* check if we must repeat the frame */
373 if (repeat_first_field) {
374 if (pc->progressive_sequence) {
379 } else if (progressive_frame) {
391 /* we stop parsing when we encounter a slice. It ensures
392 that this function takes a negligible amount of time */
393 if (start_code >= SLICE_MIN_START_CODE &&
394 start_code <= SLICE_MAX_START_CODE)
402 static int mpegvideo_parse(AVCodecParserContext *s,
403 AVCodecContext *avctx,
404 uint8_t **poutbuf, int *poutbuf_size,
405 const uint8_t *buf, int buf_size)
407 ParseContext1 *pc = s->priv_data;
410 next= mpeg1_find_frame_end(pc, buf, buf_size);
412 if (ff_combine_frame1(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
417 /* we have a full frame : we just parse the first few MPEG headers
418 to have the full timing information. The time take by this
419 function should be negligible for uncorrupted streams */
420 mpegvideo_extract_headers(s, avctx, buf, buf_size);
422 printf("pict_type=%d frame_rate=%0.3f repeat_pict=%d\n",
423 s->pict_type, (double)avctx->frame_rate / avctx->frame_rate_base, s->repeat_pict);
426 *poutbuf = (uint8_t *)buf;
427 *poutbuf_size = buf_size;
431 static void mpegvideo_parse_close(AVCodecParserContext *s)
433 ParseContext1 *pc = s->priv_data;
439 /*************************/
442 * finds the end of the current frame in the bitstream.
443 * @return the position of the first byte of the next frame, or -1
445 static int mpeg4_find_frame_end(ParseContext1 *pc,
446 const uint8_t *buf, int buf_size)
451 vop_found= pc->frame_start_found;
456 for(i=0; i<buf_size; i++){
457 state= (state<<8) | buf[i];
467 /* EOF considered as end of frame */
470 for(; i<buf_size; i++){
471 state= (state<<8) | buf[i];
472 if((state&0xFFFFFF00) == 0x100){
473 pc->frame_start_found=0;
479 pc->frame_start_found= vop_found;
481 return END_NOT_FOUND;
485 /* XXX: make it use less memory */
486 static int av_mpeg4_decode_header(AVCodecParserContext *s1,
487 AVCodecContext *avctx,
488 const uint8_t *buf, int buf_size)
490 ParseContext1 *pc = s1->priv_data;
491 MpegEncContext *s = pc->enc;
492 GetBitContext gb1, *gb = &gb1;
496 s->current_picture_ptr = &s->current_picture;
498 if (avctx->extradata_size && pc->first_picture){
499 init_get_bits(gb, avctx->extradata, avctx->extradata_size*8);
500 ret = ff_mpeg4_decode_picture_header(s, gb);
503 init_get_bits(gb, buf, 8 * buf_size);
504 ret = ff_mpeg4_decode_picture_header(s, gb);
506 avctx->width = s->width;
507 avctx->height = s->height;
509 pc->first_picture = 0;
513 int mpeg4video_parse_init(AVCodecParserContext *s)
515 ParseContext1 *pc = s->priv_data;
517 pc->enc = av_mallocz(sizeof(MpegEncContext));
520 pc->first_picture = 1;
524 static int mpeg4video_parse(AVCodecParserContext *s,
525 AVCodecContext *avctx,
526 uint8_t **poutbuf, int *poutbuf_size,
527 const uint8_t *buf, int buf_size)
529 ParseContext1 *pc = s->priv_data;
532 next= mpeg4_find_frame_end(pc, buf, buf_size);
534 if (ff_combine_frame1(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
539 av_mpeg4_decode_header(s, avctx, buf, buf_size);
541 *poutbuf = (uint8_t *)buf;
542 *poutbuf_size = buf_size;
546 /*************************/
548 static int h263_find_frame_end(ParseContext1 *pc, const uint8_t *buf, int buf_size)
553 vop_found= pc->frame_start_found;
558 for(i=0; i<buf_size; i++){
559 state= (state<<8) | buf[i];
560 if(state>>(32-22) == 0x20){
569 for(; i<buf_size; i++){
570 state= (state<<8) | buf[i];
571 if(state>>(32-22) == 0x20){
572 pc->frame_start_found=0;
578 pc->frame_start_found= vop_found;
581 return END_NOT_FOUND;
584 static int h263_parse(AVCodecParserContext *s,
585 AVCodecContext *avctx,
586 uint8_t **poutbuf, int *poutbuf_size,
587 const uint8_t *buf, int buf_size)
589 ParseContext1 *pc = s->priv_data;
592 next= h263_find_frame_end(pc, buf, buf_size);
594 if (ff_combine_frame1(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
600 *poutbuf = (uint8_t *)buf;
601 *poutbuf_size = buf_size;
605 /*************************/
608 * finds the end of the current frame in the bitstream.
609 * @return the position of the first byte of the next frame, or -1
611 static int h264_find_frame_end(ParseContext1 *pc, const uint8_t *buf, int buf_size)
615 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
616 // mb_addr= pc->mb_addr - 1;
618 //FIXME this will fail with slices
619 for(i=0; i<buf_size; i++){
620 state= (state<<8) | buf[i];
621 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
622 if(pc->frame_start_found){
624 pc->frame_start_found= 0;
627 pc->frame_start_found= 1;
632 return END_NOT_FOUND;
635 static int h264_parse(AVCodecParserContext *s,
636 AVCodecContext *avctx,
637 uint8_t **poutbuf, int *poutbuf_size,
638 const uint8_t *buf, int buf_size)
640 ParseContext1 *pc = s->priv_data;
643 next= h264_find_frame_end(pc, buf, buf_size);
645 if (ff_combine_frame1(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
651 *poutbuf = (uint8_t *)buf;
652 *poutbuf_size = buf_size;
656 /*************************/
658 typedef struct MpegAudioParseContext {
659 uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE]; /* input buffer */
662 int free_format_frame_size;
663 int free_format_next_header;
664 } MpegAudioParseContext;
666 #define MPA_HEADER_SIZE 4
668 /* header + layer + bitrate + freq + lsf/mpeg25 */
669 #define SAME_HEADER_MASK \
670 (0xffe00000 | (3 << 17) | (0xf << 12) | (3 << 10) | (3 << 19))
672 static int mpegaudio_parse_init(AVCodecParserContext *s1)
674 MpegAudioParseContext *s = s1->priv_data;
675 s->inbuf_ptr = s->inbuf;
679 static int mpegaudio_parse(AVCodecParserContext *s1,
680 AVCodecContext *avctx,
681 uint8_t **poutbuf, int *poutbuf_size,
682 const uint8_t *buf, int buf_size)
684 MpegAudioParseContext *s = s1->priv_data;
687 const uint8_t *buf_ptr;
692 while (buf_size > 0) {
693 len = s->inbuf_ptr - s->inbuf;
694 if (s->frame_size == 0) {
695 /* special case for next header for first frame in free
696 format case (XXX: find a simpler method) */
697 if (s->free_format_next_header != 0) {
698 s->inbuf[0] = s->free_format_next_header >> 24;
699 s->inbuf[1] = s->free_format_next_header >> 16;
700 s->inbuf[2] = s->free_format_next_header >> 8;
701 s->inbuf[3] = s->free_format_next_header;
702 s->inbuf_ptr = s->inbuf + 4;
703 s->free_format_next_header = 0;
706 /* no header seen : find one. We need at least MPA_HEADER_SIZE
708 len = MPA_HEADER_SIZE - len;
712 memcpy(s->inbuf_ptr, buf_ptr, len);
717 if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) {
719 header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
720 (s->inbuf[2] << 8) | s->inbuf[3];
722 ret = mpa_decode_header(avctx, header);
724 /* no sync found : move by one byte (inefficient, but simple!) */
725 memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
727 dprintf("skip %x\n", header);
728 /* reset free format frame size to give a chance
729 to get a new bitrate */
730 s->free_format_frame_size = 0;
734 /* free format: prepare to compute frame size */
735 if (decode_header(s, header) == 1) {
743 if (s->frame_size == -1) {
744 /* free format : find next sync to compute frame size */
745 len = MPA_MAX_CODED_FRAME_SIZE - len;
749 /* frame too long: resync */
751 memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
758 memcpy(s->inbuf_ptr, buf_ptr, len);
759 /* check for header */
760 p = s->inbuf_ptr - 3;
761 pend = s->inbuf_ptr + len - 4;
763 header = (p[0] << 24) | (p[1] << 16) |
765 header1 = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
766 (s->inbuf[2] << 8) | s->inbuf[3];
767 /* check with high probability that we have a
769 if ((header & SAME_HEADER_MASK) ==
770 (header1 & SAME_HEADER_MASK)) {
771 /* header found: update pointers */
772 len = (p + 4) - s->inbuf_ptr;
776 /* compute frame size */
777 s->free_format_next_header = header;
778 s->free_format_frame_size = s->inbuf_ptr - s->inbuf;
779 padding = (header1 >> 9) & 1;
781 s->free_format_frame_size -= padding * 4;
783 s->free_format_frame_size -= padding;
784 dprintf("free frame size=%d padding=%d\n",
785 s->free_format_frame_size, padding);
786 decode_header(s, header1);
791 /* not found: simply increase pointers */
798 if (len < s->frame_size) {
799 if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE)
800 s->frame_size = MPA_MAX_CODED_FRAME_SIZE;
801 len = s->frame_size - len;
804 memcpy(s->inbuf_ptr, buf_ptr, len);
810 if (s->frame_size > 0 &&
811 (s->inbuf_ptr - s->inbuf) >= s->frame_size) {
813 *poutbuf_size = s->inbuf_ptr - s->inbuf;
814 s->inbuf_ptr = s->inbuf;
819 return buf_ptr - buf;
823 extern int a52_syncinfo (const uint8_t * buf, int * flags,
824 int * sample_rate, int * bit_rate);
826 typedef struct AC3ParseContext {
827 uint8_t inbuf[4096]; /* input buffer */
833 #define AC3_HEADER_SIZE 7
836 static int ac3_parse_init(AVCodecParserContext *s1)
838 AC3ParseContext *s = s1->priv_data;
839 s->inbuf_ptr = s->inbuf;
843 static int ac3_parse(AVCodecParserContext *s1,
844 AVCodecContext *avctx,
845 uint8_t **poutbuf, int *poutbuf_size,
846 const uint8_t *buf, int buf_size)
848 AC3ParseContext *s = s1->priv_data;
849 const uint8_t *buf_ptr;
850 int len, sample_rate, bit_rate;
851 static const int ac3_channels[8] = {
852 2, 1, 2, 3, 3, 4, 4, 5
859 while (buf_size > 0) {
860 len = s->inbuf_ptr - s->inbuf;
861 if (s->frame_size == 0) {
862 /* no header seen : find one. We need at least 7 bytes to parse it */
863 len = AC3_HEADER_SIZE - len;
866 memcpy(s->inbuf_ptr, buf_ptr, len);
870 if ((s->inbuf_ptr - s->inbuf) == AC3_HEADER_SIZE) {
871 len = a52_syncinfo(s->inbuf, &s->flags, &sample_rate, &bit_rate);
873 /* no sync found : move by one byte (inefficient, but simple!) */
874 memmove(s->inbuf, s->inbuf + 1, AC3_HEADER_SIZE - 1);
878 /* update codec info */
879 avctx->sample_rate = sample_rate;
880 /* set channels,except if the user explicitly requests 1 or 2 channels, XXX/FIXME this is a bit ugly */
881 if(avctx->channels!=1 && avctx->channels!=2){
882 avctx->channels = ac3_channels[s->flags & 7];
883 if (s->flags & A52_LFE)
886 avctx->bit_rate = bit_rate;
887 avctx->frame_size = 6 * 256;
890 } else if (len < s->frame_size) {
891 len = s->frame_size - len;
895 memcpy(s->inbuf_ptr, buf_ptr, len);
901 *poutbuf_size = s->frame_size;
902 s->inbuf_ptr = s->inbuf;
907 return buf_ptr - buf;
911 AVCodecParser mpegvideo_parser = {
912 { CODEC_ID_MPEG1VIDEO, CODEC_ID_MPEG2VIDEO },
913 sizeof(ParseContext1),
916 mpegvideo_parse_close,
919 AVCodecParser mpeg4video_parser = {
921 sizeof(ParseContext1),
922 mpeg4video_parse_init,
924 mpegvideo_parse_close,
927 AVCodecParser h263_parser = {
929 sizeof(ParseContext1),
932 mpegvideo_parse_close,
935 AVCodecParser h264_parser = {
937 sizeof(ParseContext1),
940 mpegvideo_parse_close,
943 AVCodecParser mpegaudio_parser = {
944 { CODEC_ID_MP2, CODEC_ID_MP3 },
945 sizeof(MpegAudioParseContext),
946 mpegaudio_parse_init,
952 AVCodecParser ac3_parser = {
954 sizeof(AC3ParseContext),