git.sesse.net Git - ffmpeg/blob - libavcodec/h264_parser.c

   1 /*
   2  * H.26L/H.264/AVC/JVT/14496-10/... parser
   3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * H.264 / AVC / MPEG4 part10 parser.
  25  * @author Michael Niedermayer <michaelni@gmx.at>
  26  */
  27
  28 #include <assert.h>
  29 #include <stdint.h>
  30
  31 #include "libavutil/avutil.h"
  32 #include "libavutil/error.h"
  33 #include "libavutil/log.h"
  34 #include "libavutil/mem.h"
  35 #include "libavutil/pixfmt.h"
  36
  37 #include "get_bits.h"
  38 #include "golomb.h"
  39 #include "h264.h"
  40 #include "h264data.h"
  41 #include "internal.h"
  42 #include "mpegutils.h"
  43 #include "parser.h"
  44
  45 typedef struct H264ParseContext {
  46     H264Context h;
  47     ParseContext pc;
  48     int got_first;
  49 } H264ParseContext;
  50
  51
  52 static int h264_find_frame_end(H264ParseContext *p, const uint8_t *buf,
  53                                int buf_size)
  54 {
  55     H264Context *h = &p->h;
  56     int i;
  57     uint32_t state;
  58     ParseContext *pc = &p->pc;
  59 //    mb_addr= pc->mb_addr - 1;
  60     state = pc->state;
  61     if (state > 13)
  62         state = 7;
  63
  64     for (i = 0; i < buf_size; i++) {
  65         if (state == 7) {
  66             i += h->h264dsp.startcode_find_candidate(buf + i, buf_size - i);
  67             if (i < buf_size)
  68                 state = 2;
  69         } else if (state <= 2) {
  70             if (buf[i] == 1)
  71                 state ^= 5;            // 2->7, 1->4, 0->5
  72             else if (buf[i])
  73                 state = 7;
  74             else
  75                 state >>= 1;           // 2->1, 1->0, 0->0
  76         } else if (state <= 5) {
  77             int nalu_type = buf[i] & 0x1F;
  78             if (nalu_type == NAL_SEI || nalu_type == NAL_SPS ||
  79                 nalu_type == NAL_PPS || nalu_type == NAL_AUD) {
  80                 if (pc->frame_start_found) {
  81                     i++;
  82                     goto found;
  83                 }
  84             } else if (nalu_type == NAL_SLICE || nalu_type == NAL_DPA ||
  85                        nalu_type == NAL_IDR_SLICE) {
  86                 if (pc->frame_start_found) {
  87                     state += 8;
  88                     continue;
  89                 } else
  90                     pc->frame_start_found = 1;
  91             }
  92             state = 7;
  93         } else {
  94             // first_mb_in_slice is 0, probably the first nal of a new slice
  95             if (buf[i] & 0x80)
  96                 goto found;
  97             state = 7;
  98         }
  99     }
 100     pc->state = state;
 101     return END_NOT_FOUND;
 102
 103 found:
 104     pc->state             = 7;
 105     pc->frame_start_found = 0;
 106     return i - (state & 5);
 107 }
 108
 109 static int scan_mmco_reset(AVCodecParserContext *s)
 110 {
 111     H264ParseContext *p = s->priv_data;
 112     H264Context      *h = &p->h;
 113     H264SliceContext *sl = &h->slice_ctx[0];
 114
 115     sl->slice_type_nos = s->pict_type & 3;
 116
 117     if (h->pps.redundant_pic_cnt_present)
 118         get_ue_golomb(&sl->gb); // redundant_pic_count
 119
 120     if (sl->slice_type_nos == AV_PICTURE_TYPE_B)
 121         get_bits1(&sl->gb); // direct_spatial_mv_pred
 122
 123     if (ff_set_ref_count(h, sl) < 0)
 124         return AVERROR_INVALIDDATA;
 125
 126     if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
 127         int list;
 128         for (list = 0; list < sl->list_count; list++) {
 129             if (get_bits1(&sl->gb)) {
 130                 int index;
 131                 for (index = 0; ; index++) {
 132                     unsigned int reordering_of_pic_nums_idc = get_ue_golomb_31(&sl->gb);
 133
 134                     if (reordering_of_pic_nums_idc < 3)
 135                         get_ue_golomb(&sl->gb);
 136                     else if (reordering_of_pic_nums_idc > 3) {
 137                         av_log(h->avctx, AV_LOG_ERROR,
 138                                "illegal reordering_of_pic_nums_idc %d\n",
 139                                reordering_of_pic_nums_idc);
 140                         return AVERROR_INVALIDDATA;
 141                     } else
 142                         break;
 143
 144                     if (index >= sl->ref_count[list]) {
 145                         av_log(h->avctx, AV_LOG_ERROR,
 146                                "reference count %d overflow\n", index);
 147                         return AVERROR_INVALIDDATA;
 148                     }
 149                 }
 150             }
 151         }
 152     }
 153
 154     if ((h->pps.weighted_pred && sl->slice_type_nos == AV_PICTURE_TYPE_P) ||
 155         (h->pps.weighted_bipred_idc == 1 && sl->slice_type_nos == AV_PICTURE_TYPE_B))
 156         ff_h264_pred_weight_table(&sl->gb, &h->sps, sl->ref_count, sl->slice_type_nos,
 157                                   &sl->pwt);
 158
 159     if (get_bits1(&sl->gb)) { // adaptive_ref_pic_marking_mode_flag
 160         int i;
 161         for (i = 0; i < MAX_MMCO_COUNT; i++) {
 162             MMCOOpcode opcode = get_ue_golomb_31(&sl->gb);
 163             if (opcode > (unsigned) MMCO_LONG) {
 164                 av_log(h->avctx, AV_LOG_ERROR,
 165                        "illegal memory management control operation %d\n",
 166                        opcode);
 167                 return AVERROR_INVALIDDATA;
 168             }
 169             if (opcode == MMCO_END)
 170                return 0;
 171             else if (opcode == MMCO_RESET)
 172                 return 1;
 173
 174             if (opcode == MMCO_SHORT2UNUSED || opcode == MMCO_SHORT2LONG)
 175                 get_ue_golomb(&sl->gb);
 176             if (opcode == MMCO_SHORT2LONG || opcode == MMCO_LONG2UNUSED ||
 177                 opcode == MMCO_LONG || opcode == MMCO_SET_MAX_LONG)
 178                 get_ue_golomb_31(&sl->gb);
 179         }
 180     }
 181
 182     return 0;
 183 }
 184
 185 /**
 186  * Parse NAL units of found picture and decode some basic information.
 187  *
 188  * @param s parser context.
 189  * @param avctx codec context.
 190  * @param buf buffer with field/frame data.
 191  * @param buf_size size of the buffer.
 192  */
 193 static inline int parse_nal_units(AVCodecParserContext *s,
 194                                   AVCodecContext *avctx,
 195                                   const uint8_t *buf, int buf_size)
 196 {
 197     H264ParseContext *p = s->priv_data;
 198     H264Context      *h = &p->h;
 199     H264SliceContext *sl = &h->slice_ctx[0];
 200     const uint8_t *buf_end = buf + buf_size;
 201
 202     H2645NAL nal = { NULL };
 203
 204     unsigned int pps_id;
 205     unsigned int slice_type;
 206     int state = -1, got_reset = 0;
 207     int field_poc[2];
 208     int ret;
 209
 210     /* set some sane default values */
 211     s->pict_type         = AV_PICTURE_TYPE_I;
 212     s->key_frame         = 0;
 213     s->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
 214
 215     h->avctx = avctx;
 216     ff_h264_reset_sei(h);
 217
 218     if (!buf_size)
 219         return 0;
 220
 221     for (;;) {
 222         int src_length, consumed;
 223         buf = avpriv_find_start_code(buf, buf_end, &state);
 224         if (buf >= buf_end)
 225             break;
 226         --buf;
 227         src_length = buf_end - buf;
 228         switch (state & 0x1f) {
 229         case NAL_SLICE:
 230         case NAL_IDR_SLICE:
 231             // Do not walk the whole buffer just to decode slice header
 232             if ((state & 0x1f) == NAL_IDR_SLICE || ((state >> 5) & 0x3) == 0) {
 233                 /* IDR or disposable slice
 234                  * No need to decode many bytes because MMCOs shall not be present. */
 235                 if (src_length > 60)
 236                     src_length = 60;
 237             } else {
 238                 /* To decode up to MMCOs */
 239                 if (src_length > 1000)
 240                     src_length = 1000;
 241             }
 242             break;
 243         }
 244
 245         consumed = ff_h2645_extract_rbsp(buf, src_length, &nal);
 246         if (consumed < 0)
 247             break;
 248
 249         ret = init_get_bits(&nal.gb, nal.data, nal.size * 8);
 250         if (ret < 0)
 251             goto fail;
 252         get_bits1(&nal.gb);
 253         nal.ref_idc = get_bits(&nal.gb, 2);
 254         nal.type    = get_bits(&nal.gb, 5);
 255
 256         h->gb            = nal.gb;
 257         h->nal_ref_idc   = nal.ref_idc;
 258         h->nal_unit_type = nal.type;
 259
 260         switch (h->nal_unit_type) {
 261         case NAL_SPS:
 262             ff_h264_decode_seq_parameter_set(h);
 263             break;
 264         case NAL_PPS:
 265             ff_h264_decode_picture_parameter_set(h, h->gb.size_in_bits);
 266             break;
 267         case NAL_SEI:
 268             ff_h264_decode_sei(h);
 269             break;
 270         case NAL_IDR_SLICE:
 271             s->key_frame = 1;
 272
 273             h->prev_frame_num        = 0;
 274             h->prev_frame_num_offset = 0;
 275             h->prev_poc_msb          =
 276             h->prev_poc_lsb          = 0;
 277         /* fall through */
 278         case NAL_SLICE:
 279             sl->gb = nal.gb;
 280             get_ue_golomb(&sl->gb);  // skip first_mb_in_slice
 281             slice_type   = get_ue_golomb_31(&sl->gb);
 282             s->pict_type = ff_h264_golomb_to_pict_type[slice_type % 5];
 283             if (h->sei_recovery_frame_cnt >= 0) {
 284                 /* key frame, since recovery_frame_cnt is set */
 285                 s->key_frame = 1;
 286             }
 287             pps_id = get_ue_golomb(&sl->gb);
 288             if (pps_id >= MAX_PPS_COUNT) {
 289                 av_log(h->avctx, AV_LOG_ERROR,
 290                        "pps_id %u out of range\n", pps_id);
 291                 goto fail;
 292             }
 293             if (!h->pps_buffers[pps_id]) {
 294                 av_log(h->avctx, AV_LOG_ERROR,
 295                        "non-existing PPS %u referenced\n", pps_id);
 296                 goto fail;
 297             }
 298             h->pps = *h->pps_buffers[pps_id];
 299             if (!h->sps_buffers[h->pps.sps_id]) {
 300                 av_log(h->avctx, AV_LOG_ERROR,
 301                        "non-existing SPS %u referenced\n", h->pps.sps_id);
 302                 goto fail;
 303             }
 304             h->sps       = *h->sps_buffers[h->pps.sps_id];
 305             h->frame_num = get_bits(&sl->gb, h->sps.log2_max_frame_num);
 306
 307             s->coded_width  = 16 * h->sps.mb_width;
 308             s->coded_height = 16 * h->sps.mb_height;
 309             s->width        = s->coded_width  - (h->sps.crop_right + h->sps.crop_left);
 310             s->height       = s->coded_height - (h->sps.crop_top   + h->sps.crop_bottom);
 311             if (s->width <= 0 || s->height <= 0) {
 312                 s->width  = s->coded_width;
 313                 s->height = s->coded_height;
 314             }
 315
 316             switch (h->sps.bit_depth_luma) {
 317             case 9:
 318                 if (CHROMA444(h))      s->format = AV_PIX_FMT_YUV444P9;
 319                 else if (CHROMA422(h)) s->format = AV_PIX_FMT_YUV422P9;
 320                 else                   s->format = AV_PIX_FMT_YUV420P9;
 321                 break;
 322             case 10:
 323                 if (CHROMA444(h))      s->format = AV_PIX_FMT_YUV444P10;
 324                 else if (CHROMA422(h)) s->format = AV_PIX_FMT_YUV422P10;
 325                 else                   s->format = AV_PIX_FMT_YUV420P10;
 326                 break;
 327             case 8:
 328                 if (CHROMA444(h))      s->format = AV_PIX_FMT_YUV444P;
 329                 else if (CHROMA422(h)) s->format = AV_PIX_FMT_YUV422P;
 330                 else                   s->format = AV_PIX_FMT_YUV420P;
 331                 break;
 332             default:
 333                 s->format = AV_PIX_FMT_NONE;
 334             }
 335
 336             avctx->profile = ff_h264_get_profile(&h->sps);
 337             avctx->level   = h->sps.level_idc;
 338
 339             if (h->sps.frame_mbs_only_flag) {
 340                 h->picture_structure = PICT_FRAME;
 341             } else {
 342                 if (get_bits1(&sl->gb)) { // field_pic_flag
 343                     h->picture_structure = PICT_TOP_FIELD + get_bits1(&sl->gb); // bottom_field_flag
 344                 } else {
 345                     h->picture_structure = PICT_FRAME;
 346                 }
 347             }
 348
 349             if (h->nal_unit_type == NAL_IDR_SLICE)
 350                 get_ue_golomb(&sl->gb); /* idr_pic_id */
 351             if (h->sps.poc_type == 0) {
 352                 h->poc_lsb = get_bits(&sl->gb, h->sps.log2_max_poc_lsb);
 353
 354                 if (h->pps.pic_order_present == 1 &&
 355                     h->picture_structure == PICT_FRAME)
 356                     h->delta_poc_bottom = get_se_golomb(&sl->gb);
 357             }
 358
 359             if (h->sps.poc_type == 1 &&
 360                 !h->sps.delta_pic_order_always_zero_flag) {
 361                 h->delta_poc[0] = get_se_golomb(&sl->gb);
 362
 363                 if (h->pps.pic_order_present == 1 &&
 364                     h->picture_structure == PICT_FRAME)
 365                     h->delta_poc[1] = get_se_golomb(&sl->gb);
 366             }
 367
 368             /* Decode POC of this picture.
 369              * The prev_ values needed for decoding POC of the next picture are not set here. */
 370             field_poc[0] = field_poc[1] = INT_MAX;
 371             ff_init_poc(h, field_poc, &s->output_picture_number);
 372
 373             /* Continue parsing to check if MMCO_RESET is present.
 374              * FIXME: MMCO_RESET could appear in non-first slice.
 375              *        Maybe, we should parse all undisposable non-IDR slice of this
 376              *        picture until encountering MMCO_RESET in a slice of it. */
 377             if (h->nal_ref_idc && h->nal_unit_type != NAL_IDR_SLICE) {
 378                 got_reset = scan_mmco_reset(s);
 379                 if (got_reset < 0)
 380                     goto fail;
 381             }
 382
 383             /* Set up the prev_ values for decoding POC of the next picture. */
 384             h->prev_frame_num        = got_reset ? 0 : h->frame_num;
 385             h->prev_frame_num_offset = got_reset ? 0 : h->frame_num_offset;
 386             if (h->nal_ref_idc != 0) {
 387                 if (!got_reset) {
 388                     h->prev_poc_msb = h->poc_msb;
 389                     h->prev_poc_lsb = h->poc_lsb;
 390                 } else {
 391                     h->prev_poc_msb = 0;
 392                     h->prev_poc_lsb =
 393                         h->picture_structure == PICT_BOTTOM_FIELD ? 0 : field_poc[0];
 394                 }
 395             }
 396
 397             if (h->sps.pic_struct_present_flag) {
 398                 switch (h->sei_pic_struct) {
 399                 case SEI_PIC_STRUCT_TOP_FIELD:
 400                 case SEI_PIC_STRUCT_BOTTOM_FIELD:
 401                     s->repeat_pict = 0;
 402                     break;
 403                 case SEI_PIC_STRUCT_FRAME:
 404                 case SEI_PIC_STRUCT_TOP_BOTTOM:
 405                 case SEI_PIC_STRUCT_BOTTOM_TOP:
 406                     s->repeat_pict = 1;
 407                     break;
 408                 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
 409                 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
 410                     s->repeat_pict = 2;
 411                     break;
 412                 case SEI_PIC_STRUCT_FRAME_DOUBLING:
 413                     s->repeat_pict = 3;
 414                     break;
 415                 case SEI_PIC_STRUCT_FRAME_TRIPLING:
 416                     s->repeat_pict = 5;
 417                     break;
 418                 default:
 419                     s->repeat_pict = h->picture_structure == PICT_FRAME ? 1 : 0;
 420                     break;
 421                 }
 422             } else {
 423                 s->repeat_pict = h->picture_structure == PICT_FRAME ? 1 : 0;
 424             }
 425
 426             if (h->picture_structure == PICT_FRAME) {
 427                 s->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
 428                 if (h->sps.pic_struct_present_flag) {
 429                     switch (h->sei_pic_struct) {
 430                     case SEI_PIC_STRUCT_TOP_BOTTOM:
 431                     case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
 432                         s->field_order = AV_FIELD_TT;
 433                         break;
 434                     case SEI_PIC_STRUCT_BOTTOM_TOP:
 435                     case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
 436                         s->field_order = AV_FIELD_BB;
 437                         break;
 438                     default:
 439                         s->field_order = AV_FIELD_PROGRESSIVE;
 440                         break;
 441                     }
 442                 } else {
 443                     if (field_poc[0] < field_poc[1])
 444                         s->field_order = AV_FIELD_TT;
 445                     else if (field_poc[0] > field_poc[1])
 446                         s->field_order = AV_FIELD_BB;
 447                     else
 448                         s->field_order = AV_FIELD_PROGRESSIVE;
 449                 }
 450             } else {
 451                 if (h->picture_structure == PICT_TOP_FIELD)
 452                     s->picture_structure = AV_PICTURE_STRUCTURE_TOP_FIELD;
 453                 else
 454                     s->picture_structure = AV_PICTURE_STRUCTURE_BOTTOM_FIELD;
 455                 s->field_order = AV_FIELD_UNKNOWN;
 456             }
 457
 458             av_freep(&nal.rbsp_buffer);
 459             return 0; /* no need to evaluate the rest */
 460         }
 461         buf += consumed;
 462     }
 463     /* didn't find a picture! */
 464     av_log(h->avctx, AV_LOG_ERROR, "missing picture in access unit\n");
 465 fail:
 466     av_freep(&nal.rbsp_buffer);
 467     return -1;
 468 }
 469
 470 static int h264_parse(AVCodecParserContext *s,
 471                       AVCodecContext *avctx,
 472                       const uint8_t **poutbuf, int *poutbuf_size,
 473                       const uint8_t *buf, int buf_size)
 474 {
 475     H264ParseContext *p = s->priv_data;
 476     H264Context      *h = &p->h;
 477     ParseContext *pc = &p->pc;
 478     int next;
 479
 480     if (!p->got_first) {
 481         p->got_first = 1;
 482         if (avctx->extradata_size) {
 483             h->avctx = avctx;
 484             // must be done like in the decoder.
 485             // otherwise opening the parser, creating extradata,
 486             // and then closing and opening again
 487             // will cause has_b_frames to be always set.
 488             // NB: estimate_timings_from_pts behaves exactly like this.
 489             if (!avctx->has_b_frames)
 490                 h->low_delay = 1;
 491             ff_h264_decode_extradata(h);
 492         }
 493     }
 494
 495     if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
 496         next = buf_size;
 497     } else {
 498         next = h264_find_frame_end(p, buf, buf_size);
 499
 500         if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
 501             *poutbuf      = NULL;
 502             *poutbuf_size = 0;
 503             return buf_size;
 504         }
 505
 506         if (next < 0 && next != END_NOT_FOUND) {
 507             assert(pc->last_index + next >= 0);
 508             h264_find_frame_end(p, &pc->buffer[pc->last_index + next], -next); // update state
 509         }
 510     }
 511
 512     parse_nal_units(s, avctx, buf, buf_size);
 513
 514     if (h->sei_cpb_removal_delay >= 0) {
 515         s->dts_sync_point    = h->sei_buffering_period_present;
 516         s->dts_ref_dts_delta = h->sei_cpb_removal_delay;
 517         s->pts_dts_delta     = h->sei_dpb_output_delay;
 518     } else {
 519         s->dts_sync_point    = INT_MIN;
 520         s->dts_ref_dts_delta = INT_MIN;
 521         s->pts_dts_delta     = INT_MIN;
 522     }
 523
 524     if (s->flags & PARSER_FLAG_ONCE) {
 525         s->flags &= PARSER_FLAG_COMPLETE_FRAMES;
 526     }
 527
 528     *poutbuf      = buf;
 529     *poutbuf_size = buf_size;
 530     return next;
 531 }
 532
 533 static int h264_split(AVCodecContext *avctx,
 534                       const uint8_t *buf, int buf_size)
 535 {
 536     int i;
 537     uint32_t state = -1;
 538     int has_sps    = 0;
 539
 540     for (i = 0; i <= buf_size; i++) {
 541         if ((state & 0xFFFFFF1F) == 0x107)
 542             has_sps = 1;
 543         /*  if((state&0xFFFFFF1F) == 0x101 ||
 544          *     (state&0xFFFFFF1F) == 0x102 ||
 545          *     (state&0xFFFFFF1F) == 0x105) {
 546          *  }
 547          */
 548         if ((state & 0xFFFFFF00) == 0x100 && (state & 0xFFFFFF1F) != 0x106 &&
 549             (state & 0xFFFFFF1F) != 0x107 && (state & 0xFFFFFF1F) != 0x108 &&
 550             (state & 0xFFFFFF1F) != 0x109 && (state & 0xFFFFFF1F) != 0x10d &&
 551             (state & 0xFFFFFF1F) != 0x10f) {
 552             if (has_sps) {
 553                 while (i > 4 && buf[i - 5] == 0)
 554                     i--;
 555                 return i - 4;
 556             }
 557         }
 558         if (i < buf_size)
 559             state = (state << 8) | buf[i];
 560     }
 561     return 0;
 562 }
 563
 564 static void h264_close(AVCodecParserContext *s)
 565 {
 566     H264ParseContext *p = s->priv_data;
 567     H264Context      *h = &p->h;
 568     ParseContext *pc = &p->pc;
 569
 570     av_free(pc->buffer);
 571     ff_h264_free_context(h);
 572 }
 573
 574 static av_cold int init(AVCodecParserContext *s)
 575 {
 576     H264ParseContext *p = s->priv_data;
 577     H264Context      *h = &p->h;
 578
 579     h->slice_ctx = av_mallocz(sizeof(*h->slice_ctx));
 580     if (!h->slice_ctx)
 581         return 0;
 582     h->nb_slice_ctx = 1;
 583
 584     h->slice_context_count = 1;
 585     ff_h264dsp_init(&h->h264dsp, 8, 1);
 586     return 0;
 587 }
 588
 589 AVCodecParser ff_h264_parser = {
 590     .codec_ids      = { AV_CODEC_ID_H264 },
 591     .priv_data_size = sizeof(H264ParseContext),
 592     .parser_init    = init,
 593     .parser_parse   = h264_parse,
 594     .parser_close   = h264_close,
 595     .split          = h264_split,
 596 };