]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_parser.c
avcodec: Add output_picture_number to AVCodecParserContext
[ffmpeg] / libavcodec / h264_parser.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... parser
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 parser.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #include "libavutil/attributes.h"
29 #include "parser.h"
30 #include "h264data.h"
31 #include "golomb.h"
32 #include "internal.h"
33
34 #include <assert.h>
35
36
37 static int h264_find_frame_end(H264Context *h, const uint8_t *buf,
38                                int buf_size)
39 {
40     int i;
41     uint32_t state;
42     ParseContext *pc = &h->parse_context;
43 //    mb_addr= pc->mb_addr - 1;
44     state = pc->state;
45     if (state > 13)
46         state = 7;
47
48     for (i = 0; i < buf_size; i++) {
49         if (state == 7) {
50 #if HAVE_FAST_UNALIGNED
51             /* we check i < buf_size instead of i + 3 / 7 because it is
52              * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
53              * bytes at the end.
54              */
55 #if HAVE_FAST_64BIT
56             while (i < buf_size &&
57                    !((~*(const uint64_t *)(buf + i) &
58                       (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
59                       0x8080808080808080ULL))
60                 i += 8;
61 #else
62             while (i < buf_size &&
63                    !((~*(const uint32_t *)(buf + i) &
64                       (*(const uint32_t *)(buf + i) - 0x01010101U)) &
65                       0x80808080U))
66                 i += 4;
67 #endif
68 #endif
69             for (; i < buf_size; i++)
70                 if (!buf[i]) {
71                     state = 2;
72                     break;
73                 }
74         } else if (state <= 2) {
75             if (buf[i] == 1)
76                 state ^= 5;            // 2->7, 1->4, 0->5
77             else if (buf[i])
78                 state = 7;
79             else
80                 state >>= 1;           // 2->1, 1->0, 0->0
81         } else if (state <= 5) {
82             int v = buf[i] & 0x1F;
83             if (v == 6 || v == 7 || v == 8 || v == 9) {
84                 if (pc->frame_start_found) {
85                     i++;
86                     goto found;
87                 }
88             } else if (v == 1 || v == 2 || v == 5) {
89                 if (pc->frame_start_found) {
90                     state += 8;
91                     continue;
92                 } else
93                     pc->frame_start_found = 1;
94             }
95             state = 7;
96         } else {
97             if (buf[i] & 0x80)
98                 goto found;
99             state = 7;
100         }
101     }
102     pc->state = state;
103     return END_NOT_FOUND;
104
105 found:
106     pc->state             = 7;
107     pc->frame_start_found = 0;
108     return i - (state & 5);
109 }
110
111 /**
112  * Parse NAL units of found picture and decode some basic information.
113  *
114  * @param s parser context.
115  * @param avctx codec context.
116  * @param buf buffer with field/frame data.
117  * @param buf_size size of the buffer.
118  */
119 static inline int parse_nal_units(AVCodecParserContext *s,
120                                   AVCodecContext *avctx,
121                                   const uint8_t *buf, int buf_size)
122 {
123     H264Context *h         = s->priv_data;
124     const uint8_t *buf_end = buf + buf_size;
125     unsigned int pps_id;
126     unsigned int slice_type;
127     int state = -1;
128     const uint8_t *ptr;
129     int field_poc[2];
130
131     /* set some sane default values */
132     s->pict_type         = AV_PICTURE_TYPE_I;
133     s->key_frame         = 0;
134     s->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
135
136     h->avctx                        = avctx;
137     h->sei_recovery_frame_cnt       = -1;
138     h->sei_dpb_output_delay         = 0;
139     h->sei_cpb_removal_delay        = -1;
140     h->sei_buffering_period_present = 0;
141
142     if (!buf_size)
143         return 0;
144
145     for (;;) {
146         int src_length, dst_length, consumed;
147         buf = avpriv_find_start_code(buf, buf_end, &state);
148         if (buf >= buf_end)
149             break;
150         --buf;
151         src_length = buf_end - buf;
152         switch (state & 0x1f) {
153         case NAL_SLICE:
154         case NAL_IDR_SLICE:
155             // Do not walk the whole buffer just to decode slice header
156             if (src_length > 20)
157                 src_length = 20;
158             break;
159         }
160         ptr = ff_h264_decode_nal(h, buf, &dst_length, &consumed, src_length);
161         if (ptr == NULL || dst_length < 0)
162             break;
163
164         init_get_bits(&h->gb, ptr, 8 * dst_length);
165         switch (h->nal_unit_type) {
166         case NAL_SPS:
167             ff_h264_decode_seq_parameter_set(h);
168             break;
169         case NAL_PPS:
170             ff_h264_decode_picture_parameter_set(h, h->gb.size_in_bits);
171             break;
172         case NAL_SEI:
173             ff_h264_decode_sei(h);
174             break;
175         case NAL_IDR_SLICE:
176             s->key_frame = 1;
177
178             h->prev_frame_num        = 0;
179             h->prev_frame_num_offset = 0;
180             h->prev_poc_msb          =
181             h->prev_poc_lsb          = 0;
182         /* fall through */
183         case NAL_SLICE:
184             get_ue_golomb(&h->gb);  // skip first_mb_in_slice
185             slice_type   = get_ue_golomb_31(&h->gb);
186             s->pict_type = golomb_to_pict_type[slice_type % 5];
187             if (h->sei_recovery_frame_cnt >= 0) {
188                 /* key frame, since recovery_frame_cnt is set */
189                 s->key_frame = 1;
190             }
191             pps_id = get_ue_golomb(&h->gb);
192             if (pps_id >= MAX_PPS_COUNT) {
193                 av_log(h->avctx, AV_LOG_ERROR,
194                        "pps_id out of range\n");
195                 return -1;
196             }
197             if (!h->pps_buffers[pps_id]) {
198                 av_log(h->avctx, AV_LOG_ERROR,
199                        "non-existing PPS referenced\n");
200                 return -1;
201             }
202             h->pps = *h->pps_buffers[pps_id];
203             if (!h->sps_buffers[h->pps.sps_id]) {
204                 av_log(h->avctx, AV_LOG_ERROR,
205                        "non-existing SPS referenced\n");
206                 return -1;
207             }
208             h->sps       = *h->sps_buffers[h->pps.sps_id];
209             h->frame_num = get_bits(&h->gb, h->sps.log2_max_frame_num);
210
211             avctx->profile = ff_h264_get_profile(&h->sps);
212             avctx->level   = h->sps.level_idc;
213
214             if (h->sps.frame_mbs_only_flag) {
215                 h->picture_structure = PICT_FRAME;
216             } else {
217                 if (get_bits1(&h->gb)) { // field_pic_flag
218                     h->picture_structure = PICT_TOP_FIELD + get_bits1(&h->gb); // bottom_field_flag
219                 } else {
220                     h->picture_structure = PICT_FRAME;
221                 }
222             }
223
224             if (h->nal_unit_type == NAL_IDR_SLICE)
225                 get_ue_golomb(&h->gb); /* idr_pic_id */
226             if (h->sps.poc_type == 0) {
227                 h->poc_lsb = get_bits(&h->gb, h->sps.log2_max_poc_lsb);
228
229                 if (h->pps.pic_order_present == 1 &&
230                     h->picture_structure == PICT_FRAME)
231                     h->delta_poc_bottom = get_se_golomb(&h->gb);
232             }
233
234             if (h->sps.poc_type == 1 &&
235                 !h->sps.delta_pic_order_always_zero_flag) {
236                 h->delta_poc[0] = get_se_golomb(&h->gb);
237
238                 if (h->pps.pic_order_present == 1 &&
239                     h->picture_structure == PICT_FRAME)
240                     h->delta_poc[1] = get_se_golomb(&h->gb);
241             }
242
243             ff_init_poc(h, field_poc, &s->output_picture_number);
244
245             if (h->sps.pic_struct_present_flag) {
246                 switch (h->sei_pic_struct) {
247                 case SEI_PIC_STRUCT_TOP_FIELD:
248                 case SEI_PIC_STRUCT_BOTTOM_FIELD:
249                     s->repeat_pict = 0;
250                     break;
251                 case SEI_PIC_STRUCT_FRAME:
252                 case SEI_PIC_STRUCT_TOP_BOTTOM:
253                 case SEI_PIC_STRUCT_BOTTOM_TOP:
254                     s->repeat_pict = 1;
255                     break;
256                 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
257                 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
258                     s->repeat_pict = 2;
259                     break;
260                 case SEI_PIC_STRUCT_FRAME_DOUBLING:
261                     s->repeat_pict = 3;
262                     break;
263                 case SEI_PIC_STRUCT_FRAME_TRIPLING:
264                     s->repeat_pict = 5;
265                     break;
266                 default:
267                     s->repeat_pict = h->picture_structure == PICT_FRAME ? 1 : 0;
268                     break;
269                 }
270             } else {
271                 s->repeat_pict = h->picture_structure == PICT_FRAME ? 1 : 0;
272             }
273
274             if (h->picture_structure == PICT_FRAME) {
275                 s->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
276                 if (h->sps.pic_struct_present_flag) {
277                     switch (h->sei_pic_struct) {
278                     case SEI_PIC_STRUCT_TOP_BOTTOM:
279                     case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
280                         s->field_order = AV_FIELD_TT;
281                         break;
282                     case SEI_PIC_STRUCT_BOTTOM_TOP:
283                     case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
284                         s->field_order = AV_FIELD_BB;
285                         break;
286                     default:
287                         s->field_order = AV_FIELD_PROGRESSIVE;
288                         break;
289                     }
290                 } else {
291                     if (field_poc[0] < field_poc[1])
292                         s->field_order = AV_FIELD_TT;
293                     else if (field_poc[0] > field_poc[1])
294                         s->field_order = AV_FIELD_BB;
295                     else
296                         s->field_order = AV_FIELD_PROGRESSIVE;
297                 }
298             } else {
299                 if (h->picture_structure == PICT_TOP_FIELD)
300                     s->picture_structure = AV_PICTURE_STRUCTURE_TOP_FIELD;
301                 else
302                     s->picture_structure = AV_PICTURE_STRUCTURE_BOTTOM_FIELD;
303                 s->field_order = AV_FIELD_UNKNOWN;
304             }
305
306             return 0; /* no need to evaluate the rest */
307         }
308         buf += consumed;
309     }
310     /* didn't find a picture! */
311     av_log(h->avctx, AV_LOG_ERROR, "missing picture in access unit\n");
312     return -1;
313 }
314
315 static int h264_parse(AVCodecParserContext *s,
316                       AVCodecContext *avctx,
317                       const uint8_t **poutbuf, int *poutbuf_size,
318                       const uint8_t *buf, int buf_size)
319 {
320     H264Context *h   = s->priv_data;
321     ParseContext *pc = &h->parse_context;
322     int next;
323
324     if (!h->got_first) {
325         h->got_first = 1;
326         if (avctx->extradata_size) {
327             h->avctx = avctx;
328             // must be done like in the decoder.
329             // otherwise opening the parser, creating extradata,
330             // and then closing and opening again
331             // will cause has_b_frames to be always set.
332             // NB: estimate_timings_from_pts behaves exactly like this.
333             if (!avctx->has_b_frames)
334                 h->low_delay = 1;
335             ff_h264_decode_extradata(h);
336         }
337     }
338
339     if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
340         next = buf_size;
341     } else {
342         next = h264_find_frame_end(h, buf, buf_size);
343
344         if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
345             *poutbuf      = NULL;
346             *poutbuf_size = 0;
347             return buf_size;
348         }
349
350         if (next < 0 && next != END_NOT_FOUND) {
351             assert(pc->last_index + next >= 0);
352             h264_find_frame_end(h, &pc->buffer[pc->last_index + next], -next); // update state
353         }
354     }
355
356     parse_nal_units(s, avctx, buf, buf_size);
357
358     if (h->sei_cpb_removal_delay >= 0) {
359         s->dts_sync_point    = h->sei_buffering_period_present;
360         s->dts_ref_dts_delta = h->sei_cpb_removal_delay;
361         s->pts_dts_delta     = h->sei_dpb_output_delay;
362     } else {
363         s->dts_sync_point    = INT_MIN;
364         s->dts_ref_dts_delta = INT_MIN;
365         s->pts_dts_delta     = INT_MIN;
366     }
367
368     if (s->flags & PARSER_FLAG_ONCE) {
369         s->flags &= PARSER_FLAG_COMPLETE_FRAMES;
370     }
371
372     *poutbuf      = buf;
373     *poutbuf_size = buf_size;
374     return next;
375 }
376
377 static int h264_split(AVCodecContext *avctx,
378                       const uint8_t *buf, int buf_size)
379 {
380     int i;
381     uint32_t state = -1;
382     int has_sps    = 0;
383
384     for (i = 0; i <= buf_size; i++) {
385         if ((state & 0xFFFFFF1F) == 0x107)
386             has_sps = 1;
387         /*  if((state&0xFFFFFF1F) == 0x101 ||
388          *     (state&0xFFFFFF1F) == 0x102 ||
389          *     (state&0xFFFFFF1F) == 0x105) {
390          *  }
391          */
392         if ((state & 0xFFFFFF00) == 0x100 && (state & 0xFFFFFF1F) != 0x107 &&
393             (state & 0xFFFFFF1F) != 0x108 && (state & 0xFFFFFF1F) != 0x109) {
394             if (has_sps) {
395                 while (i > 4 && buf[i - 5] == 0)
396                     i--;
397                 return i - 4;
398             }
399         }
400         if (i < buf_size)
401             state = (state << 8) | buf[i];
402     }
403     return 0;
404 }
405
406 static void close(AVCodecParserContext *s)
407 {
408     H264Context *h   = s->priv_data;
409     ParseContext *pc = &h->parse_context;
410
411     av_free(pc->buffer);
412     ff_h264_free_context(h);
413 }
414
415 static av_cold int init(AVCodecParserContext *s)
416 {
417     H264Context *h = s->priv_data;
418     h->thread_context[0]   = h;
419     h->slice_context_count = 1;
420     return 0;
421 }
422
423 AVCodecParser ff_h264_parser = {
424     .codec_ids      = { AV_CODEC_ID_H264 },
425     .priv_data_size = sizeof(H264Context),
426     .parser_init    = init,
427     .parser_parse   = h264_parse,
428     .parser_close   = close,
429     .split          = h264_split,
430 };