2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 #include "libavutil/avstring.h"
20 #include "libavutil/display.h"
21 #include "libavutil/common.h"
22 #include "libavutil/opt.h"
29 #include "h264_levels.h"
42 typedef struct H264MetadataContext {
50 AVRational sample_aspect_ratio;
52 int overscan_appropriate_flag;
55 int video_full_range_flag;
57 int transfer_characteristics;
58 int matrix_coefficients;
60 int chroma_sample_loc_type;
63 int fixed_frame_rate_flag;
70 const char *sei_user_data;
71 SEIRawUserDataUnregistered sei_user_data_payload;
75 int display_orientation;
78 H264RawSEIDisplayOrientation display_orientation_payload;
81 } H264MetadataContext;
84 static int h264_metadata_insert_aud(AVBSFContext *bsf,
85 CodedBitstreamFragment *au)
87 H264MetadataContext *ctx = bsf->priv_data;
88 int primary_pic_type_mask = 0xff;
91 static const int primary_pic_type_table[] = {
94 0x0e7, // 0, 1, 2, 5, 6, 7
98 0x3bd, // 0, 2, 3, 4, 5, 7, 8, 9
99 0x3ff, // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
102 for (i = 0; i < au->nb_units; i++) {
103 if (au->units[i].type == H264_NAL_SLICE ||
104 au->units[i].type == H264_NAL_IDR_SLICE) {
105 H264RawSlice *slice = au->units[i].content;
106 for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++) {
107 if (!(primary_pic_type_table[j] &
108 (1 << slice->header.slice_type)))
109 primary_pic_type_mask &= ~(1 << j);
113 for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++)
114 if (primary_pic_type_mask & (1 << j))
116 if (j >= FF_ARRAY_ELEMS(primary_pic_type_table)) {
117 av_log(bsf, AV_LOG_ERROR, "No usable primary_pic_type: "
118 "invalid slice types?\n");
119 return AVERROR_INVALIDDATA;
122 ctx->aud_nal = (H264RawAUD) {
123 .nal_unit_header.nal_unit_type = H264_NAL_AUD,
124 .primary_pic_type = j,
127 err = ff_cbs_insert_unit_content(au, 0, H264_NAL_AUD,
128 &ctx->aud_nal, NULL);
130 av_log(bsf, AV_LOG_ERROR, "Failed to insert AUD.\n");
137 static int h264_metadata_update_sps(AVBSFContext *bsf,
140 H264MetadataContext *ctx = bsf->priv_data;
142 int crop_unit_x, crop_unit_y;
144 if (ctx->sample_aspect_ratio.num && ctx->sample_aspect_ratio.den) {
146 static const AVRational sar_idc[] = {
147 { 0, 0 }, // Unspecified (never written here).
148 { 1, 1 }, { 12, 11 }, { 10, 11 }, { 16, 11 },
149 { 40, 33 }, { 24, 11 }, { 20, 11 }, { 32, 11 },
150 { 80, 33 }, { 18, 11 }, { 15, 11 }, { 64, 33 },
151 { 160, 99 }, { 4, 3 }, { 3, 2 }, { 2, 1 },
155 av_reduce(&num, &den, ctx->sample_aspect_ratio.num,
156 ctx->sample_aspect_ratio.den, 65535);
158 for (i = 1; i < FF_ARRAY_ELEMS(sar_idc); i++) {
159 if (num == sar_idc[i].num &&
160 den == sar_idc[i].den)
163 if (i == FF_ARRAY_ELEMS(sar_idc)) {
164 sps->vui.aspect_ratio_idc = 255;
165 sps->vui.sar_width = num;
166 sps->vui.sar_height = den;
168 sps->vui.aspect_ratio_idc = i;
170 sps->vui.aspect_ratio_info_present_flag = 1;
174 #define SET_VUI_FIELD(field) do { \
175 if (ctx->field >= 0) { \
176 sps->vui.field = ctx->field; \
181 if (ctx->overscan_appropriate_flag >= 0) {
182 SET_VUI_FIELD(overscan_appropriate_flag);
183 sps->vui.overscan_info_present_flag = 1;
186 if (ctx->video_format >= 0 ||
187 ctx->video_full_range_flag >= 0 ||
188 ctx->colour_primaries >= 0 ||
189 ctx->transfer_characteristics >= 0 ||
190 ctx->matrix_coefficients >= 0) {
192 SET_VUI_FIELD(video_format);
194 SET_VUI_FIELD(video_full_range_flag);
196 if (ctx->colour_primaries >= 0 ||
197 ctx->transfer_characteristics >= 0 ||
198 ctx->matrix_coefficients >= 0) {
200 SET_VUI_FIELD(colour_primaries);
201 SET_VUI_FIELD(transfer_characteristics);
202 SET_VUI_FIELD(matrix_coefficients);
204 sps->vui.colour_description_present_flag = 1;
206 sps->vui.video_signal_type_present_flag = 1;
209 if (ctx->chroma_sample_loc_type >= 0) {
210 sps->vui.chroma_sample_loc_type_top_field =
211 ctx->chroma_sample_loc_type;
212 sps->vui.chroma_sample_loc_type_bottom_field =
213 ctx->chroma_sample_loc_type;
214 sps->vui.chroma_loc_info_present_flag = 1;
218 if (ctx->tick_rate.num && ctx->tick_rate.den) {
221 av_reduce(&num, &den, ctx->tick_rate.num, ctx->tick_rate.den,
222 UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
224 sps->vui.time_scale = num;
225 sps->vui.num_units_in_tick = den;
227 sps->vui.timing_info_present_flag = 1;
230 SET_VUI_FIELD(fixed_frame_rate_flag);
232 if (sps->separate_colour_plane_flag || sps->chroma_format_idc == 0) {
234 crop_unit_y = 2 - sps->frame_mbs_only_flag;
236 crop_unit_x = 1 + (sps->chroma_format_idc < 3);
237 crop_unit_y = (1 + (sps->chroma_format_idc < 2)) *
238 (2 - sps->frame_mbs_only_flag);
240 #define CROP(border, unit) do { \
241 if (ctx->crop_ ## border >= 0) { \
242 if (ctx->crop_ ## border % unit != 0) { \
243 av_log(bsf, AV_LOG_ERROR, "Invalid value for crop_%s: " \
244 "must be a multiple of %d.\n", #border, unit); \
245 return AVERROR(EINVAL); \
247 sps->frame_crop_ ## border ## _offset = \
248 ctx->crop_ ## border / unit; \
249 sps->frame_cropping_flag = 1; \
252 CROP(left, crop_unit_x);
253 CROP(right, crop_unit_x);
254 CROP(top, crop_unit_y);
255 CROP(bottom, crop_unit_y);
258 if (ctx->level != LEVEL_UNSET) {
261 if (ctx->level == LEVEL_AUTO) {
262 const H264LevelDescriptor *desc;
264 int width, height, dpb_frames;
267 if (sps->vui.nal_hrd_parameters_present_flag) {
268 bit_rate = (sps->vui.nal_hrd_parameters.bit_rate_value_minus1[0] + 1) *
269 (INT64_C(1) << (sps->vui.nal_hrd_parameters.bit_rate_scale + 6));
270 } else if (sps->vui.vcl_hrd_parameters_present_flag) {
271 bit_rate = (sps->vui.vcl_hrd_parameters.bit_rate_value_minus1[0] + 1) *
272 (INT64_C(1) << (sps->vui.vcl_hrd_parameters.bit_rate_scale + 6));
273 // Adjust for VCL vs. NAL limits.
274 bit_rate = bit_rate * 6 / 5;
279 // Don't use max_dec_frame_buffering if it is only inferred.
280 dpb_frames = sps->vui.bitstream_restriction_flag ?
281 sps->vui.max_dec_frame_buffering : H264_MAX_DPB_FRAMES;
283 width = 16 * (sps->pic_width_in_mbs_minus1 + 1);
284 height = 16 * (sps->pic_height_in_map_units_minus1 + 1) *
285 (2 - sps->frame_mbs_only_flag);
287 if (sps->vui.timing_info_present_flag)
288 framerate = sps->vui.time_scale / sps->vui.num_units_in_tick / 2;
292 desc = ff_h264_guess_level(sps->profile_idc, bit_rate, framerate,
293 width, height, dpb_frames);
295 level_idc = desc->level_idc;
297 av_log(bsf, AV_LOG_WARNING, "Stream does not appear to "
298 "conform to any level: using level 6.2.\n");
302 level_idc = ctx->level;
305 if (level_idc == 9) {
306 if (sps->profile_idc == 66 ||
307 sps->profile_idc == 77 ||
308 sps->profile_idc == 88) {
310 sps->constraint_set3_flag = 1;
315 sps->level_idc = level_idc;
320 sps->vui_parameters_present_flag = 1;
325 static int h264_metadata_handle_display_orientation(AVBSFContext *bsf,
327 CodedBitstreamFragment *au,
330 H264MetadataContext *ctx = bsf->priv_data;
331 SEIRawMessage *message;
335 while (ff_cbs_sei_find_message(ctx->common.output, au,
336 SEI_TYPE_DISPLAY_ORIENTATION,
338 H264RawSEIDisplayOrientation *disp = message->payload;
341 matrix = av_malloc(9 * sizeof(int32_t));
343 return AVERROR(ENOMEM);
345 av_display_rotation_set(matrix,
346 disp->anticlockwise_rotation *
348 av_display_matrix_flip(matrix, disp->hor_flip, disp->ver_flip);
350 // If there are multiple display orientation messages in an
351 // access unit, then the last one added to the packet (i.e.
352 // the first one in the access unit) will prevail.
353 err = av_packet_add_side_data(pkt, AV_PKT_DATA_DISPLAYMATRIX,
355 9 * sizeof(int32_t));
357 av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted "
358 "displaymatrix side data to packet.\n");
360 return AVERROR(ENOMEM);
364 if (ctx->display_orientation == BSF_ELEMENT_REMOVE ||
365 ctx->display_orientation == BSF_ELEMENT_INSERT) {
366 ff_cbs_sei_delete_message_type(ctx->common.output, au,
367 SEI_TYPE_DISPLAY_ORIENTATION);
370 if (ctx->display_orientation == BSF_ELEMENT_INSERT) {
371 H264RawSEIDisplayOrientation *disp =
372 &ctx->display_orientation_payload;
377 data = av_packet_get_side_data(pkt, AV_PKT_DATA_DISPLAYMATRIX, &size);
378 if (data && size >= 9 * sizeof(int32_t)) {
382 double scale_x, scale_y, angle;
384 memcpy(matrix, data, sizeof(matrix));
386 for (i = 0; i < 9; i++)
387 dmatrix[i] = matrix[i] / 65536.0;
389 // Extract scale factors.
390 scale_x = hypot(dmatrix[0], dmatrix[3]);
391 scale_y = hypot(dmatrix[1], dmatrix[4]);
393 // Select flips to make the main diagonal positive.
394 hflip = dmatrix[0] < 0.0;
395 vflip = dmatrix[4] < 0.0;
402 for (i = 0; i < 9; i += 3) {
403 dmatrix[i] /= scale_x;
404 dmatrix[i + 1] /= scale_y;
408 angle = atan2(dmatrix[3], dmatrix[0]);
410 if (!(angle >= -M_PI && angle <= M_PI) ||
411 matrix[2] != 0.0 || matrix[5] != 0.0 ||
412 matrix[6] != 0.0 || matrix[7] != 0.0) {
413 av_log(bsf, AV_LOG_WARNING, "Input display matrix is not "
414 "representable in H.264 parameters.\n");
416 disp->hor_flip = hflip;
417 disp->ver_flip = vflip;
418 disp->anticlockwise_rotation =
419 (uint16_t)rint((angle >= 0.0 ? angle
420 : angle + 2 * M_PI) *
427 if (!isnan(ctx->rotate)) {
428 disp->anticlockwise_rotation =
429 (uint16_t)rint((ctx->rotate >= 0.0 ? ctx->rotate
430 : ctx->rotate + 360.0) *
435 disp->hor_flip = !!(ctx->flip & FLIP_HORIZONTAL);
436 disp->ver_flip = !!(ctx->flip & FLIP_VERTICAL);
442 disp->display_orientation_repetition_period = 1;
444 err = ff_cbs_sei_add_message(ctx->common.output, au, 1,
445 SEI_TYPE_DISPLAY_ORIENTATION,
448 av_log(bsf, AV_LOG_ERROR, "Failed to add display orientation "
449 "SEI message to access unit.\n");
458 static int h264_metadata_update_fragment(AVBSFContext *bsf, AVPacket *pkt,
459 CodedBitstreamFragment *au)
461 H264MetadataContext *ctx = bsf->priv_data;
462 int err, i, has_sps, seek_point;
464 // If an AUD is present, it must be the first NAL unit.
465 if (au->nb_units && au->units[0].type == H264_NAL_AUD) {
466 if (ctx->aud == BSF_ELEMENT_REMOVE)
467 ff_cbs_delete_unit(au, 0);
469 if (ctx->aud == BSF_ELEMENT_INSERT) {
470 err = h264_metadata_insert_aud(bsf, au);
477 for (i = 0; i < au->nb_units; i++) {
478 if (au->units[i].type == H264_NAL_SPS) {
479 err = h264_metadata_update_sps(bsf, au->units[i].content);
487 // The current packet should be treated as a seek point for metadata
488 // insertion if any of:
489 // - It is the first packet in the stream.
490 // - It contains an SPS, indicating that a sequence might start here.
491 // - It is marked as containing a key frame.
492 seek_point = !ctx->done_first_au || has_sps ||
493 (pkt->flags & AV_PKT_FLAG_KEY);
498 if (ctx->sei_user_data && seek_point) {
499 err = ff_cbs_sei_add_message(ctx->common.output, au, 1,
500 SEI_TYPE_USER_DATA_UNREGISTERED,
501 &ctx->sei_user_data_payload, NULL);
503 av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
504 "message to access unit.\n");
509 if (ctx->delete_filler) {
510 for (i = au->nb_units - 1; i >= 0; i--) {
511 if (au->units[i].type == H264_NAL_FILLER_DATA) {
512 ff_cbs_delete_unit(au, i);
517 ff_cbs_sei_delete_message_type(ctx->common.output, au,
518 SEI_TYPE_FILLER_PAYLOAD);
521 if (pkt && ctx->display_orientation != BSF_ELEMENT_PASS) {
522 err = h264_metadata_handle_display_orientation(bsf, pkt, au,
529 ctx->done_first_au = 1;
534 static const CBSBSFType h264_metadata_type = {
535 .codec_id = AV_CODEC_ID_H264,
536 .fragment_name = "access unit",
537 .unit_name = "NAL unit",
538 .update_fragment = &h264_metadata_update_fragment,
541 static int h264_metadata_init(AVBSFContext *bsf)
543 H264MetadataContext *ctx = bsf->priv_data;
545 if (ctx->sei_user_data) {
546 SEIRawUserDataUnregistered *udu = &ctx->sei_user_data_payload;
549 // Parse UUID. It must be a hex string of length 32, possibly
550 // containing '-'s between hex digits (which we ignore).
551 for (i = j = 0; j < 32 && i < 64 && ctx->sei_user_data[i]; i++) {
553 c = ctx->sei_user_data[i];
556 } else if (av_isxdigit(c)) {
558 v = (c <= '9' ? c - '0' : c - 'a' + 10);
563 udu->uuid_iso_iec_11578[j / 2] |= v;
565 udu->uuid_iso_iec_11578[j / 2] = v << 4;
568 if (j == 32 && ctx->sei_user_data[i] == '+') {
569 udu->data = (uint8_t*)ctx->sei_user_data + i + 1;
570 udu->data_length = strlen(udu->data) + 1;
572 av_log(bsf, AV_LOG_ERROR, "Invalid user data: "
573 "must be \"UUID+string\".\n");
574 return AVERROR(EINVAL);
578 return ff_cbs_bsf_generic_init(bsf, &h264_metadata_type);
581 #define OFFSET(x) offsetof(H264MetadataContext, x)
582 #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
583 static const AVOption h264_metadata_options[] = {
584 BSF_ELEMENT_OPTIONS_PIR("aud", "Access Unit Delimiter NAL units",
587 { "sample_aspect_ratio", "Set sample aspect ratio (table E-1)",
588 OFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL,
589 { .dbl = 0.0 }, 0, 65535, FLAGS },
591 { "overscan_appropriate_flag", "Set VUI overscan appropriate flag",
592 OFFSET(overscan_appropriate_flag), AV_OPT_TYPE_INT,
593 { .i64 = -1 }, -1, 1, FLAGS },
595 { "video_format", "Set video format (table E-2)",
596 OFFSET(video_format), AV_OPT_TYPE_INT,
597 { .i64 = -1 }, -1, 7, FLAGS},
598 { "video_full_range_flag", "Set video full range flag",
599 OFFSET(video_full_range_flag), AV_OPT_TYPE_INT,
600 { .i64 = -1 }, -1, 1, FLAGS },
601 { "colour_primaries", "Set colour primaries (table E-3)",
602 OFFSET(colour_primaries), AV_OPT_TYPE_INT,
603 { .i64 = -1 }, -1, 255, FLAGS },
604 { "transfer_characteristics", "Set transfer characteristics (table E-4)",
605 OFFSET(transfer_characteristics), AV_OPT_TYPE_INT,
606 { .i64 = -1 }, -1, 255, FLAGS },
607 { "matrix_coefficients", "Set matrix coefficients (table E-5)",
608 OFFSET(matrix_coefficients), AV_OPT_TYPE_INT,
609 { .i64 = -1 }, -1, 255, FLAGS },
611 { "chroma_sample_loc_type", "Set chroma sample location type (figure E-1)",
612 OFFSET(chroma_sample_loc_type), AV_OPT_TYPE_INT,
613 { .i64 = -1 }, -1, 6, FLAGS },
615 { "tick_rate", "Set VUI tick rate (num_units_in_tick / time_scale)",
616 OFFSET(tick_rate), AV_OPT_TYPE_RATIONAL,
617 { .dbl = 0.0 }, 0, UINT_MAX, FLAGS },
618 { "fixed_frame_rate_flag", "Set VUI fixed frame rate flag",
619 OFFSET(fixed_frame_rate_flag), AV_OPT_TYPE_INT,
620 { .i64 = -1 }, -1, 1, FLAGS },
622 { "crop_left", "Set left border crop offset",
623 OFFSET(crop_left), AV_OPT_TYPE_INT,
624 { .i64 = -1 }, -1, H264_MAX_WIDTH, FLAGS },
625 { "crop_right", "Set right border crop offset",
626 OFFSET(crop_right), AV_OPT_TYPE_INT,
627 { .i64 = -1 }, -1, H264_MAX_WIDTH, FLAGS },
628 { "crop_top", "Set top border crop offset",
629 OFFSET(crop_top), AV_OPT_TYPE_INT,
630 { .i64 = -1 }, -1, H264_MAX_HEIGHT, FLAGS },
631 { "crop_bottom", "Set bottom border crop offset",
632 OFFSET(crop_bottom), AV_OPT_TYPE_INT,
633 { .i64 = -1 }, -1, H264_MAX_HEIGHT, FLAGS },
635 { "sei_user_data", "Insert SEI user data (UUID+string)",
636 OFFSET(sei_user_data), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
638 { "delete_filler", "Delete all filler (both NAL and SEI)",
639 OFFSET(delete_filler), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS},
641 BSF_ELEMENT_OPTIONS_PIRE("display_orientation",
642 "Display orientation SEI",
643 display_orientation, FLAGS),
645 { "rotate", "Set rotation in display orientation SEI (anticlockwise angle in degrees)",
646 OFFSET(rotate), AV_OPT_TYPE_DOUBLE,
647 { .dbl = NAN }, -360.0, +360.0, FLAGS },
648 { "flip", "Set flip in display orientation SEI",
649 OFFSET(flip), AV_OPT_TYPE_FLAGS,
650 { .i64 = 0 }, 0, FLIP_HORIZONTAL | FLIP_VERTICAL, FLAGS, "flip" },
651 { "horizontal", "Set hor_flip",
652 0, AV_OPT_TYPE_CONST,
653 { .i64 = FLIP_HORIZONTAL }, .flags = FLAGS, .unit = "flip" },
654 { "vertical", "Set ver_flip",
655 0, AV_OPT_TYPE_CONST,
656 { .i64 = FLIP_VERTICAL }, .flags = FLAGS, .unit = "flip" },
658 { "level", "Set level (table A-1)",
659 OFFSET(level), AV_OPT_TYPE_INT,
660 { .i64 = LEVEL_UNSET }, LEVEL_UNSET, 0xff, FLAGS, "level" },
661 { "auto", "Attempt to guess level from stream properties",
662 0, AV_OPT_TYPE_CONST,
663 { .i64 = LEVEL_AUTO }, .flags = FLAGS, .unit = "level" },
664 #define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
665 { .i64 = value }, .flags = FLAGS, .unit = "level"
668 { LEVEL("1.1", 11) },
669 { LEVEL("1.2", 12) },
670 { LEVEL("1.3", 13) },
672 { LEVEL("2.1", 21) },
673 { LEVEL("2.2", 22) },
675 { LEVEL("3.1", 31) },
676 { LEVEL("3.2", 32) },
678 { LEVEL("4.1", 41) },
679 { LEVEL("4.2", 42) },
681 { LEVEL("5.1", 51) },
682 { LEVEL("5.2", 52) },
684 { LEVEL("6.1", 61) },
685 { LEVEL("6.2", 62) },
691 static const AVClass h264_metadata_class = {
692 .class_name = "h264_metadata_bsf",
693 .item_name = av_default_item_name,
694 .option = h264_metadata_options,
695 .version = LIBAVUTIL_VERSION_INT,
698 static const enum AVCodecID h264_metadata_codec_ids[] = {
699 AV_CODEC_ID_H264, AV_CODEC_ID_NONE,
702 const AVBitStreamFilter ff_h264_metadata_bsf = {
703 .name = "h264_metadata",
704 .priv_data_size = sizeof(H264MetadataContext),
705 .priv_class = &h264_metadata_class,
706 .init = &h264_metadata_init,
707 .close = &ff_cbs_bsf_generic_close,
708 .filter = &ff_cbs_bsf_generic_filter,
709 .codec_ids = h264_metadata_codec_ids,