git.sesse.net Git - ffmpeg/blob - libavcodec/h264_parser.c

   1 /*
   2  * H.26L/H.264/AVC/JVT/14496-10/... parser
   3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * H.264 / AVC / MPEG4 part10 parser.
  25  * @author Michael Niedermayer <michaelni@gmx.at>
  26  */
  27
  28 #include "libavutil/attributes.h"
  29 #include "parser.h"
  30 #include "h264data.h"
  31 #include "golomb.h"
  32 #include "internal.h"
  33
  34 #include <assert.h>
  35
  36
  37 static int h264_find_frame_end(H264Context *h, const uint8_t *buf,
  38                                int buf_size)
  39 {
  40     int i;
  41     uint32_t state;
  42     ParseContext *pc = &h->parse_context;
  43 //    mb_addr= pc->mb_addr - 1;
  44     state = pc->state;
  45     if (state > 13)
  46         state = 7;
  47
  48     for (i = 0; i < buf_size; i++) {
  49         if (state == 7) {
  50             i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i);
  51             if (i < buf_size)
  52                 state = 2;
  53         } else if (state <= 2) {
  54             if (buf[i] == 1)
  55                 state ^= 5;            // 2->7, 1->4, 0->5
  56             else if (buf[i])
  57                 state = 7;
  58             else
  59                 state >>= 1;           // 2->1, 1->0, 0->0
  60         } else if (state <= 5) {
  61             int v = buf[i] & 0x1F;
  62             if (v == 6 || v == 7 || v == 8 || v == 9) {
  63                 if (pc->frame_start_found) {
  64                     i++;
  65                     goto found;
  66                 }
  67             } else if (v == 1 || v == 2 || v == 5) {
  68                 if (pc->frame_start_found) {
  69                     state += 8;
  70                     continue;
  71                 } else
  72                     pc->frame_start_found = 1;
  73             }
  74             state = 7;
  75         } else {
  76             if (buf[i] & 0x80)
  77                 goto found;
  78             state = 7;
  79         }
  80     }
  81     pc->state = state;
  82     return END_NOT_FOUND;
  83
  84 found:
  85     pc->state             = 7;
  86     pc->frame_start_found = 0;
  87     return i - (state & 5);
  88 }
  89
  90 static int scan_mmco_reset(AVCodecParserContext *s)
  91 {
  92     H264Context *h = s->priv_data;
  93
  94     h->slice_type_nos = s->pict_type & 3;
  95
  96     if (h->pps.redundant_pic_cnt_present)
  97         get_ue_golomb(&h->gb); // redundant_pic_count
  98
  99     if (ff_set_ref_count(h) < 0)
 100         return AVERROR_INVALIDDATA;
 101
 102     if (h->slice_type_nos != AV_PICTURE_TYPE_I) {
 103         int list;
 104         for (list = 0; list < h->list_count; list++) {
 105             if (get_bits1(&h->gb)) {
 106                 int index;
 107                 for (index = 0; ; index++) {
 108                     unsigned int reordering_of_pic_nums_idc = get_ue_golomb_31(&h->gb);
 109
 110                     if (reordering_of_pic_nums_idc < 3)
 111                         get_ue_golomb(&h->gb);
 112                     else if (reordering_of_pic_nums_idc > 3) {
 113                         av_log(h->avctx, AV_LOG_ERROR,
 114                                "illegal reordering_of_pic_nums_idc %d\n",
 115                                reordering_of_pic_nums_idc);
 116                         return AVERROR_INVALIDDATA;
 117                     } else
 118                         break;
 119
 120                     if (index >= h->ref_count[list]) {
 121                         av_log(h->avctx, AV_LOG_ERROR,
 122                                "reference count %d overflow\n", index);
 123                         return AVERROR_INVALIDDATA;
 124                     }
 125                 }
 126             }
 127         }
 128     }
 129
 130     if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) ||
 131         (h->pps.weighted_bipred_idc == 1 && h->slice_type_nos == AV_PICTURE_TYPE_B))
 132         ff_pred_weight_table(h);
 133
 134     if (get_bits1(&h->gb)) { // adaptive_ref_pic_marking_mode_flag
 135         int i;
 136         for (i = 0; i < MAX_MMCO_COUNT; i++) {
 137             MMCOOpcode opcode = get_ue_golomb_31(&h->gb);
 138             if (opcode > (unsigned) MMCO_LONG) {
 139                 av_log(h->avctx, AV_LOG_ERROR,
 140                        "illegal memory management control operation %d\n",
 141                        opcode);
 142                 return AVERROR_INVALIDDATA;
 143             }
 144             if (opcode == MMCO_END)
 145                return 0;
 146             else if (opcode == MMCO_RESET)
 147                 return 1;
 148
 149             if (opcode == MMCO_SHORT2UNUSED || opcode == MMCO_SHORT2LONG)
 150                 get_ue_golomb(&h->gb);
 151             if (opcode == MMCO_SHORT2LONG || opcode == MMCO_LONG2UNUSED ||
 152                 opcode == MMCO_LONG || opcode == MMCO_SET_MAX_LONG)
 153                 get_ue_golomb_31(&h->gb);
 154         }
 155     }
 156
 157     return 0;
 158 }
 159
 160 /**
 161  * Parse NAL units of found picture and decode some basic information.
 162  *
 163  * @param s parser context.
 164  * @param avctx codec context.
 165  * @param buf buffer with field/frame data.
 166  * @param buf_size size of the buffer.
 167  */
 168 static inline int parse_nal_units(AVCodecParserContext *s,
 169                                   AVCodecContext *avctx,
 170                                   const uint8_t *buf, int buf_size)
 171 {
 172     H264Context *h         = s->priv_data;
 173     const uint8_t *buf_end = buf + buf_size;
 174     unsigned int pps_id;
 175     unsigned int slice_type;
 176     int state = -1, got_reset = 0;
 177     const uint8_t *ptr;
 178     int field_poc[2];
 179
 180     /* set some sane default values */
 181     s->pict_type         = AV_PICTURE_TYPE_I;
 182     s->key_frame         = 0;
 183     s->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
 184
 185     h->avctx = avctx;
 186     ff_h264_reset_sei(h);
 187
 188     if (!buf_size)
 189         return 0;
 190
 191     for (;;) {
 192         int src_length, dst_length, consumed;
 193         buf = avpriv_find_start_code(buf, buf_end, &state);
 194         if (buf >= buf_end)
 195             break;
 196         --buf;
 197         src_length = buf_end - buf;
 198         switch (state & 0x1f) {
 199         case NAL_SLICE:
 200         case NAL_IDR_SLICE:
 201             // Do not walk the whole buffer just to decode slice header
 202             if ((state & 0x1f) == NAL_IDR_SLICE || ((state >> 5) & 0x3) == 0) {
 203                 /* IDR or disposable slice
 204                  * No need to decode many bytes because MMCOs shall not be present. */
 205                 if (src_length > 60)
 206                     src_length = 60;
 207             } else {
 208                 /* To decode up to MMCOs */
 209                 if (src_length > 1000)
 210                     src_length = 1000;
 211             }
 212             break;
 213         }
 214         ptr = ff_h264_decode_nal(h, buf, &dst_length, &consumed, src_length);
 215         if (ptr == NULL || dst_length < 0)
 216             break;
 217
 218         init_get_bits(&h->gb, ptr, 8 * dst_length);
 219         switch (h->nal_unit_type) {
 220         case NAL_SPS:
 221             ff_h264_decode_seq_parameter_set(h);
 222             break;
 223         case NAL_PPS:
 224             ff_h264_decode_picture_parameter_set(h, h->gb.size_in_bits);
 225             break;
 226         case NAL_SEI:
 227             ff_h264_decode_sei(h);
 228             break;
 229         case NAL_IDR_SLICE:
 230             s->key_frame = 1;
 231
 232             h->prev_frame_num        = 0;
 233             h->prev_frame_num_offset = 0;
 234             h->prev_poc_msb          =
 235             h->prev_poc_lsb          = 0;
 236         /* fall through */
 237         case NAL_SLICE:
 238             get_ue_golomb(&h->gb);  // skip first_mb_in_slice
 239             slice_type   = get_ue_golomb_31(&h->gb);
 240             s->pict_type = golomb_to_pict_type[slice_type % 5];
 241             if (h->sei_recovery_frame_cnt >= 0) {
 242                 /* key frame, since recovery_frame_cnt is set */
 243                 s->key_frame = 1;
 244             }
 245             pps_id = get_ue_golomb(&h->gb);
 246             if (pps_id >= MAX_PPS_COUNT) {
 247                 av_log(h->avctx, AV_LOG_ERROR,
 248                        "pps_id %u out of range\n", pps_id);
 249                 return -1;
 250             }
 251             if (!h->pps_buffers[pps_id]) {
 252                 av_log(h->avctx, AV_LOG_ERROR,
 253                        "non-existing PPS %u referenced\n", pps_id);
 254                 return -1;
 255             }
 256             h->pps = *h->pps_buffers[pps_id];
 257             if (!h->sps_buffers[h->pps.sps_id]) {
 258                 av_log(h->avctx, AV_LOG_ERROR,
 259                        "non-existing SPS %u referenced\n", h->pps.sps_id);
 260                 return -1;
 261             }
 262             h->sps       = *h->sps_buffers[h->pps.sps_id];
 263             h->frame_num = get_bits(&h->gb, h->sps.log2_max_frame_num);
 264
 265             avctx->profile = ff_h264_get_profile(&h->sps);
 266             avctx->level   = h->sps.level_idc;
 267
 268             if (h->sps.frame_mbs_only_flag) {
 269                 h->picture_structure = PICT_FRAME;
 270             } else {
 271                 if (get_bits1(&h->gb)) { // field_pic_flag
 272                     h->picture_structure = PICT_TOP_FIELD + get_bits1(&h->gb); // bottom_field_flag
 273                 } else {
 274                     h->picture_structure = PICT_FRAME;
 275                 }
 276             }
 277
 278             if (h->nal_unit_type == NAL_IDR_SLICE)
 279                 get_ue_golomb(&h->gb); /* idr_pic_id */
 280             if (h->sps.poc_type == 0) {
 281                 h->poc_lsb = get_bits(&h->gb, h->sps.log2_max_poc_lsb);
 282
 283                 if (h->pps.pic_order_present == 1 &&
 284                     h->picture_structure == PICT_FRAME)
 285                     h->delta_poc_bottom = get_se_golomb(&h->gb);
 286             }
 287
 288             if (h->sps.poc_type == 1 &&
 289                 !h->sps.delta_pic_order_always_zero_flag) {
 290                 h->delta_poc[0] = get_se_golomb(&h->gb);
 291
 292                 if (h->pps.pic_order_present == 1 &&
 293                     h->picture_structure == PICT_FRAME)
 294                     h->delta_poc[1] = get_se_golomb(&h->gb);
 295             }
 296
 297             /* Decode POC of this picture.
 298              * The prev_ values needed for decoding POC of the next picture are not set here. */
 299             field_poc[0] = field_poc[1] = INT_MAX;
 300             ff_init_poc(h, field_poc, &s->output_picture_number);
 301
 302             /* Continue parsing to check if MMCO_RESET is present.
 303              * FIXME: MMCO_RESET could appear in non-first slice.
 304              *        Maybe, we should parse all undisposable non-IDR slice of this
 305              *        picture until encountering MMCO_RESET in a slice of it. */
 306             if (h->nal_ref_idc && h->nal_unit_type != NAL_IDR_SLICE) {
 307                 got_reset = scan_mmco_reset(s);
 308                 if (got_reset < 0)
 309                     return got_reset;
 310             }
 311
 312             /* Set up the prev_ values for decoding POC of the next picture. */
 313             h->prev_frame_num        = got_reset ? 0 : h->frame_num;
 314             h->prev_frame_num_offset = got_reset ? 0 : h->frame_num_offset;
 315             if (h->nal_ref_idc != 0) {
 316                 if (!got_reset) {
 317                     h->prev_poc_msb = h->poc_msb;
 318                     h->prev_poc_lsb = h->poc_lsb;
 319                 } else {
 320                     h->prev_poc_msb = 0;
 321                     h->prev_poc_lsb =
 322                         h->picture_structure == PICT_BOTTOM_FIELD ? 0 : field_poc[0];
 323                 }
 324             }
 325
 326             if (h->sps.pic_struct_present_flag) {
 327                 switch (h->sei_pic_struct) {
 328                 case SEI_PIC_STRUCT_TOP_FIELD:
 329                 case SEI_PIC_STRUCT_BOTTOM_FIELD:
 330                     s->repeat_pict = 0;
 331                     break;
 332                 case SEI_PIC_STRUCT_FRAME:
 333                 case SEI_PIC_STRUCT_TOP_BOTTOM:
 334                 case SEI_PIC_STRUCT_BOTTOM_TOP:
 335                     s->repeat_pict = 1;
 336                     break;
 337                 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
 338                 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
 339                     s->repeat_pict = 2;
 340                     break;
 341                 case SEI_PIC_STRUCT_FRAME_DOUBLING:
 342                     s->repeat_pict = 3;
 343                     break;
 344                 case SEI_PIC_STRUCT_FRAME_TRIPLING:
 345                     s->repeat_pict = 5;
 346                     break;
 347                 default:
 348                     s->repeat_pict = h->picture_structure == PICT_FRAME ? 1 : 0;
 349                     break;
 350                 }
 351             } else {
 352                 s->repeat_pict = h->picture_structure == PICT_FRAME ? 1 : 0;
 353             }
 354
 355             if (h->picture_structure == PICT_FRAME) {
 356                 s->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
 357                 if (h->sps.pic_struct_present_flag) {
 358                     switch (h->sei_pic_struct) {
 359                     case SEI_PIC_STRUCT_TOP_BOTTOM:
 360                     case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
 361                         s->field_order = AV_FIELD_TT;
 362                         break;
 363                     case SEI_PIC_STRUCT_BOTTOM_TOP:
 364                     case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
 365                         s->field_order = AV_FIELD_BB;
 366                         break;
 367                     default:
 368                         s->field_order = AV_FIELD_PROGRESSIVE;
 369                         break;
 370                     }
 371                 } else {
 372                     if (field_poc[0] < field_poc[1])
 373                         s->field_order = AV_FIELD_TT;
 374                     else if (field_poc[0] > field_poc[1])
 375                         s->field_order = AV_FIELD_BB;
 376                     else
 377                         s->field_order = AV_FIELD_PROGRESSIVE;
 378                 }
 379             } else {
 380                 if (h->picture_structure == PICT_TOP_FIELD)
 381                     s->picture_structure = AV_PICTURE_STRUCTURE_TOP_FIELD;
 382                 else
 383                     s->picture_structure = AV_PICTURE_STRUCTURE_BOTTOM_FIELD;
 384                 s->field_order = AV_FIELD_UNKNOWN;
 385             }
 386
 387             return 0; /* no need to evaluate the rest */
 388         }
 389         buf += consumed;
 390     }
 391     /* didn't find a picture! */
 392     av_log(h->avctx, AV_LOG_ERROR, "missing picture in access unit\n");
 393     return -1;
 394 }
 395
 396 static int h264_parse(AVCodecParserContext *s,
 397                       AVCodecContext *avctx,
 398                       const uint8_t **poutbuf, int *poutbuf_size,
 399                       const uint8_t *buf, int buf_size)
 400 {
 401     H264Context *h   = s->priv_data;
 402     ParseContext *pc = &h->parse_context;
 403     int next;
 404
 405     if (!h->got_first) {
 406         h->got_first = 1;
 407         if (avctx->extradata_size) {
 408             h->avctx = avctx;
 409             // must be done like in the decoder.
 410             // otherwise opening the parser, creating extradata,
 411             // and then closing and opening again
 412             // will cause has_b_frames to be always set.
 413             // NB: estimate_timings_from_pts behaves exactly like this.
 414             if (!avctx->has_b_frames)
 415                 h->low_delay = 1;
 416             ff_h264_decode_extradata(h);
 417         }
 418     }
 419
 420     if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
 421         next = buf_size;
 422     } else {
 423         next = h264_find_frame_end(h, buf, buf_size);
 424
 425         if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
 426             *poutbuf      = NULL;
 427             *poutbuf_size = 0;
 428             return buf_size;
 429         }
 430
 431         if (next < 0 && next != END_NOT_FOUND) {
 432             assert(pc->last_index + next >= 0);
 433             h264_find_frame_end(h, &pc->buffer[pc->last_index + next], -next); // update state
 434         }
 435     }
 436
 437     parse_nal_units(s, avctx, buf, buf_size);
 438
 439     if (h->sei_cpb_removal_delay >= 0) {
 440         s->dts_sync_point    = h->sei_buffering_period_present;
 441         s->dts_ref_dts_delta = h->sei_cpb_removal_delay;
 442         s->pts_dts_delta     = h->sei_dpb_output_delay;
 443     } else {
 444         s->dts_sync_point    = INT_MIN;
 445         s->dts_ref_dts_delta = INT_MIN;
 446         s->pts_dts_delta     = INT_MIN;
 447     }
 448
 449     if (s->flags & PARSER_FLAG_ONCE) {
 450         s->flags &= PARSER_FLAG_COMPLETE_FRAMES;
 451     }
 452
 453     *poutbuf      = buf;
 454     *poutbuf_size = buf_size;
 455     return next;
 456 }
 457
 458 static int h264_split(AVCodecContext *avctx,
 459                       const uint8_t *buf, int buf_size)
 460 {
 461     int i;
 462     uint32_t state = -1;
 463     int has_sps    = 0;
 464
 465     for (i = 0; i <= buf_size; i++) {
 466         if ((state & 0xFFFFFF1F) == 0x107)
 467             has_sps = 1;
 468         /*  if((state&0xFFFFFF1F) == 0x101 ||
 469          *     (state&0xFFFFFF1F) == 0x102 ||
 470          *     (state&0xFFFFFF1F) == 0x105) {
 471          *  }
 472          */
 473         if ((state & 0xFFFFFF00) == 0x100 && (state & 0xFFFFFF1F) != 0x107 &&
 474             (state & 0xFFFFFF1F) != 0x108 && (state & 0xFFFFFF1F) != 0x109) {
 475             if (has_sps) {
 476                 while (i > 4 && buf[i - 5] == 0)
 477                     i--;
 478                 return i - 4;
 479             }
 480         }
 481         if (i < buf_size)
 482             state = (state << 8) | buf[i];
 483     }
 484     return 0;
 485 }
 486
 487 static void close(AVCodecParserContext *s)
 488 {
 489     H264Context *h   = s->priv_data;
 490     ParseContext *pc = &h->parse_context;
 491
 492     av_free(pc->buffer);
 493     ff_h264_free_context(h);
 494 }
 495
 496 static av_cold int init(AVCodecParserContext *s)
 497 {
 498     H264Context *h = s->priv_data;
 499     h->thread_context[0]   = h;
 500     h->slice_context_count = 1;
 501     ff_h264dsp_init(&h->h264dsp, 8, 1);
 502     return 0;
 503 }
 504
 505 AVCodecParser ff_h264_parser = {
 506     .codec_ids      = { AV_CODEC_ID_H264 },
 507     .priv_data_size = sizeof(H264Context),
 508     .parser_init    = init,
 509     .parser_parse   = h264_parse,
 510     .parser_close   = close,
 511     .split          = h264_split,
 512 };