2 * 3GPP TS 26.245 Timed Text encoder
3 * Copyright (c) 2012 Philip Langdale <philipl@overt.org>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "libavutil/opt.h"
25 #include "libavutil/avassert.h"
26 #include "libavutil/avstring.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/mem.h"
29 #include "libavutil/common.h"
30 #include "ass_split.h"
32 #include "bytestream.h"
34 #define STYLE_FLAG_BOLD (1<<0)
35 #define STYLE_FLAG_ITALIC (1<<1)
36 #define STYLE_FLAG_UNDERLINE (1<<2)
37 #define STYLE_RECORD_SIZE 12
40 #define STYL_BOX (1<<0)
41 #define HLIT_BOX (1<<1)
42 #define HCLR_BOX (1<<2)
44 #define DEFAULT_STYLE_FONT_ID 0x01
45 #define DEFAULT_STYLE_FONTSIZE 0x12
46 #define DEFAULT_STYLE_COLOR 0xffffffff
47 #define DEFAULT_STYLE_FLAG 0x00
49 #define BGR_TO_RGB(c) (((c) & 0xff) << 16 | ((c) & 0xff00) | (((uint32_t)(c) >> 16) & 0xff))
50 #define FONTSIZE_SCALE(s,fs) ((fs) * (s)->font_scale_factor + 0.5)
51 #define av_bprint_append_any(buf, data, size) av_bprint_append_data(buf, ((const char*)data), size)
57 uint16_t style_fontID;
58 uint8_t style_fontsize;
73 AVCodecContext *avctx;
75 ASSSplitContext *ass_ctx;
76 ASSStyle *ass_dialog_style;
77 StyleBox *style_attributes;
79 unsigned style_attributes_bytes_allocated;
80 StyleBox style_attributes_temp;
90 double font_scale_factor;
96 void (*encode)(MovTextContext *s, uint32_t tsmb_type);
99 static void mov_text_cleanup(MovTextContext *s)
102 s->style_attributes_temp = s->d;
105 static void encode_styl(MovTextContext *s, uint32_t tsmb_type)
107 if ((s->box_flags & STYL_BOX) && s->count) {
108 uint8_t buf[12], *p = buf;
110 bytestream_put_be32(&p, s->count * STYLE_RECORD_SIZE + SIZE_ADD);
111 bytestream_put_be32(&p, tsmb_type);
112 bytestream_put_be16(&p, s->count);
113 /*The above three attributes are hard coded for now
114 but will come from ASS style in the future*/
115 av_bprint_append_any(&s->buffer, buf, 10);
116 for (unsigned j = 0; j < s->count; j++) {
117 const StyleBox *style = &s->style_attributes[j];
120 bytestream_put_be16(&p, style->style_start);
121 bytestream_put_be16(&p, style->style_end);
122 bytestream_put_be16(&p, style->style_fontID);
123 bytestream_put_byte(&p, style->style_flag);
124 bytestream_put_byte(&p, style->style_fontsize);
125 bytestream_put_be32(&p, style->style_color);
127 av_bprint_append_any(&s->buffer, buf, 12);
133 static void encode_hlit(MovTextContext *s, uint32_t tsmb_type)
135 if (s->box_flags & HLIT_BOX) {
136 uint8_t buf[12], *p = buf;
138 bytestream_put_be32(&p, 12);
139 bytestream_put_be32(&p, tsmb_type);
140 bytestream_put_be16(&p, s->hlit.start);
141 bytestream_put_be16(&p, s->hlit.end);
143 av_bprint_append_any(&s->buffer, buf, 12);
147 static void encode_hclr(MovTextContext *s, uint32_t tsmb_type)
149 if (s->box_flags & HCLR_BOX) {
150 uint8_t buf[12], *p = buf;
152 bytestream_put_be32(&p, 12);
153 bytestream_put_be32(&p, tsmb_type);
154 bytestream_put_be32(&p, s->hclr.color);
156 av_bprint_append_any(&s->buffer, buf, 12);
160 static const Box box_types[] = {
161 { MKBETAG('s','t','y','l'), encode_styl },
162 { MKBETAG('h','l','i','t'), encode_hlit },
163 { MKBETAG('h','c','l','r'), encode_hclr },
166 const static size_t box_count = FF_ARRAY_ELEMS(box_types);
168 static int mov_text_encode_close(AVCodecContext *avctx)
170 MovTextContext *s = avctx->priv_data;
172 ff_ass_split_free(s->ass_ctx);
173 av_freep(&s->style_attributes);
175 av_bprint_finalize(&s->buffer, NULL);
179 static int encode_sample_description(AVCodecContext *avctx)
184 uint32_t back_color = 0;
185 int font_names_total_len = 0;
186 MovTextContext *s = avctx->priv_data;
187 uint8_t buf[30], *p = buf;
189 // 0x00, 0x00, 0x00, 0x00, // uint32_t displayFlags
190 // 0x01, // int8_t horizontal-justification
191 // 0xFF, // int8_t vertical-justification
192 // 0x00, 0x00, 0x00, 0x00, // uint8_t background-color-rgba[4]
194 // 0x00, 0x00, // int16_t top
195 // 0x00, 0x00, // int16_t left
196 // 0x00, 0x00, // int16_t bottom
197 // 0x00, 0x00, // int16_t right
200 // 0x00, 0x00, // uint16_t startChar
201 // 0x00, 0x00, // uint16_t endChar
202 // 0x00, 0x01, // uint16_t font-ID
203 // 0x00, // uint8_t face-style-flags
204 // 0x12, // uint8_t font-size
205 // 0xFF, 0xFF, 0xFF, 0xFF, // uint8_t text-color-rgba[4]
208 // 0x00, 0x00, 0x00, 0x12, // uint32_t size
209 // 'f', 't', 'a', 'b', // uint8_t name[4]
210 // 0x00, 0x01, // uint16_t entry-count
212 // 0x00, 0x01, // uint16_t font-ID
213 // 0x05, // uint8_t font-name-length
214 // 'S', 'e', 'r', 'i', 'f',// uint8_t font[font-name-length]
218 // Populate sample description from ASS header
219 ass = (ASS*)s->ass_ctx;
220 // Compute font scaling factor based on (optionally) provided
221 // output video height and ASS script play_res_y
222 if (s->frame_height && ass->script_info.play_res_y)
223 s->font_scale_factor = (double)s->frame_height / ass->script_info.play_res_y;
225 s->font_scale_factor = 1;
227 style = ff_ass_style_get(s->ass_ctx, "Default");
228 if (!style && ass->styles_count) {
229 style = &ass->styles[0];
231 s->d.style_fontID = DEFAULT_STYLE_FONT_ID;
232 s->d.style_fontsize = DEFAULT_STYLE_FONTSIZE;
233 s->d.style_color = DEFAULT_STYLE_COLOR;
234 s->d.style_flag = DEFAULT_STYLE_FLAG;
236 s->d.style_fontsize = FONTSIZE_SCALE(s, style->font_size);
237 s->d.style_color = BGR_TO_RGB(style->primary_color & 0xffffff) << 8 |
238 255 - ((uint32_t)style->primary_color >> 24);
239 s->d.style_flag = (!!style->bold * STYLE_FLAG_BOLD) |
240 (!!style->italic * STYLE_FLAG_ITALIC) |
241 (!!style->underline * STYLE_FLAG_UNDERLINE);
242 back_color = (BGR_TO_RGB(style->back_color & 0xffffff) << 8) |
243 (255 - ((uint32_t)style->back_color >> 24));
246 bytestream_put_be32(&p, 0); // displayFlags
247 bytestream_put_be16(&p, 0x01FF); // horizontal/vertical justification (2x int8_t)
248 bytestream_put_be32(&p, back_color);
249 bytestream_put_be64(&p, 0); // BoxRecord - 4xint16_t: top, left, bottom, right
251 bytestream_put_be16(&p, s->d.style_start);
252 bytestream_put_be16(&p, s->d.style_end);
253 bytestream_put_be16(&p, s->d.style_fontID);
254 bytestream_put_byte(&p, s->d.style_flag);
255 bytestream_put_byte(&p, s->d.style_fontsize);
256 bytestream_put_be32(&p, s->d.style_color);
258 av_bprint_append_any(&s->buffer, buf, 30);
261 // We can't build a complete font table since that would require
262 // scanning all dialogs first. But we can at least fill in what
263 // is avaiable in the ASS header
264 if (style && ass->styles_count) {
265 // Find unique font names
266 if (style->font_name) {
267 av_dynarray_add(&s->fonts, &s->font_count, style->font_name);
268 font_names_total_len += strlen(style->font_name);
270 for (i = 0; i < ass->styles_count; i++) {
272 if (!ass->styles[i].font_name)
274 for (j = 0; j < s->font_count; j++) {
275 if (!strcmp(s->fonts[j], ass->styles[i].font_name)) {
281 av_dynarray_add(&s->fonts, &s->font_count,
282 ass->styles[i].font_name);
283 font_names_total_len += strlen(ass->styles[i].font_name);
287 av_dynarray_add(&s->fonts, &s->font_count, (char*)"Serif");
291 bytestream_put_be32(&p, SIZE_ADD + 3 * s->font_count + font_names_total_len); // Size
292 bytestream_put_be32(&p, MKBETAG('f','t','a','b'));
293 bytestream_put_be16(&p, s->font_count);
295 av_bprint_append_any(&s->buffer, buf, 10);
297 for (i = 0; i < s->font_count; i++) {
298 size_t len = strlen(s->fonts[i]);
301 bytestream_put_be16(&p, i + 1); //fontID
302 bytestream_put_byte(&p, len);
304 av_bprint_append_any(&s->buffer, buf, 3);
305 av_bprint_append_any(&s->buffer, s->fonts[i], len);
310 if (!av_bprint_is_complete(&s->buffer)) {
311 return AVERROR(ENOMEM);
314 avctx->extradata_size = s->buffer.len;
315 avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
316 if (!avctx->extradata) {
317 return AVERROR(ENOMEM);
320 memcpy(avctx->extradata, s->buffer.str, avctx->extradata_size);
321 av_bprint_clear(&s->buffer);
326 static av_cold int mov_text_encode_init(AVCodecContext *avctx)
329 MovTextContext *s = avctx->priv_data;
332 av_bprint_init(&s->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
334 s->ass_ctx = ff_ass_split(avctx->subtitle_header);
336 ret = AVERROR_INVALIDDATA;
339 ret = encode_sample_description(avctx);
346 mov_text_encode_close(avctx);
350 // Start a new style box if needed
351 static int mov_text_style_start(MovTextContext *s)
353 // there's an existing style entry
354 if (s->style_attributes_temp.style_start == s->text_pos)
355 // Still at same text pos, use same entry
357 if (s->style_attributes_temp.style_flag != s->d.style_flag ||
358 s->style_attributes_temp.style_color != s->d.style_color ||
359 s->style_attributes_temp.style_fontID != s->d.style_fontID ||
360 s->style_attributes_temp.style_fontsize != s->d.style_fontsize) {
363 // last style != defaults, end the style entry and start a new one
364 if (s->count + 1 > SIZE_MAX / sizeof(*s->style_attributes) ||
365 !(tmp = av_fast_realloc(s->style_attributes,
366 &s->style_attributes_bytes_allocated,
367 (s->count + 1) * sizeof(*s->style_attributes)))) {
369 av_bprint_clear(&s->buffer);
370 s->box_flags &= ~STYL_BOX;
373 s->style_attributes = tmp;
374 s->style_attributes_temp.style_end = s->text_pos;
375 s->style_attributes[s->count++] = s->style_attributes_temp;
376 s->box_flags |= STYL_BOX;
377 s->style_attributes_temp = s->d;
378 s->style_attributes_temp.style_start = s->text_pos;
379 } else { // style entry matches defaults, drop entry
380 s->style_attributes_temp = s->d;
381 s->style_attributes_temp.style_start = s->text_pos;
386 static uint8_t mov_text_style_to_flag(const char style)
388 uint8_t style_flag = 0;
392 style_flag = STYLE_FLAG_BOLD;
395 style_flag = STYLE_FLAG_ITALIC;
398 style_flag = STYLE_FLAG_UNDERLINE;
404 static void mov_text_style_set(MovTextContext *s, uint8_t style_flags)
406 if (!((s->style_attributes_temp.style_flag & style_flags) ^ style_flags)) {
407 // setting flags that that are already set
410 if (mov_text_style_start(s))
411 s->style_attributes_temp.style_flag |= style_flags;
414 static void mov_text_style_cb(void *priv, const char style, int close)
416 MovTextContext *s = priv;
417 uint8_t style_flag = mov_text_style_to_flag(style);
419 if (!!(s->style_attributes_temp.style_flag & style_flag) != close) {
420 // setting flag that is already set
423 if (mov_text_style_start(s)) {
425 s->style_attributes_temp.style_flag |= style_flag;
427 s->style_attributes_temp.style_flag &= ~style_flag;
431 static void mov_text_color_set(MovTextContext *s, uint32_t color)
433 if ((s->style_attributes_temp.style_color & 0xffffff00) == color) {
434 // color hasn't changed
437 if (mov_text_style_start(s))
438 s->style_attributes_temp.style_color = (color & 0xffffff00) |
439 (s->style_attributes_temp.style_color & 0xff);
442 static void mov_text_color_cb(void *priv, unsigned int color, unsigned int color_id)
444 MovTextContext *s = priv;
446 color = BGR_TO_RGB(color) << 8;
447 if (color_id == 1) { //primary color changes
448 mov_text_color_set(s, color);
449 } else if (color_id == 2) { //secondary color changes
450 if (!(s->box_flags & HCLR_BOX))
451 // Highlight alpha not set yet, use current primary alpha
452 s->hclr.color = s->style_attributes_temp.style_color;
453 if (!(s->box_flags & HLIT_BOX) || s->hlit.start == s->text_pos) {
454 s->box_flags |= HCLR_BOX;
455 s->box_flags |= HLIT_BOX;
456 s->hlit.start = s->text_pos;
457 s->hclr.color = color | (s->hclr.color & 0xFF);
460 s->hlit.end = s->text_pos;
461 /* If there are more than one secondary color changes in ASS,
462 take start of first section and end of last section. Movtext
463 allows only one highlight box per sample.
466 // Movtext does not support changes to other color_id (outline, background)
469 static void mov_text_alpha_set(MovTextContext *s, uint8_t alpha)
471 if ((s->style_attributes_temp.style_color & 0xff) == alpha) {
472 // color hasn't changed
475 if (mov_text_style_start(s))
476 s->style_attributes_temp.style_color =
477 (s->style_attributes_temp.style_color & 0xffffff00) | alpha;
480 static void mov_text_alpha_cb(void *priv, int alpha, int alpha_id)
482 MovTextContext *s = priv;
485 if (alpha_id == 1) // primary alpha changes
486 mov_text_alpha_set(s, alpha);
487 else if (alpha_id == 2) { //secondary alpha changes
488 if (!(s->box_flags & HCLR_BOX))
489 // Highlight color not set yet, use current primary color
490 s->hclr.color = s->style_attributes_temp.style_color;
491 if (!(s->box_flags & HLIT_BOX) || s->hlit.start == s->text_pos) {
492 s->box_flags |= HCLR_BOX;
493 s->box_flags |= HLIT_BOX;
494 s->hlit.start = s->text_pos;
495 s->hclr.color = (s->hclr.color & 0xffffff00) | alpha;
498 s->hlit.end = s->text_pos;
500 // Movtext does not support changes to other alpha_id (outline, background)
503 static uint16_t find_font_id(MovTextContext *s, const char *name)
506 for (i = 0; i < s->font_count; i++) {
507 if (!strcmp(name, s->fonts[i]))
513 static void mov_text_font_name_set(MovTextContext *s, const char *name)
515 int fontID = find_font_id(s, name);
516 if (s->style_attributes_temp.style_fontID == fontID) {
517 // color hasn't changed
520 if (mov_text_style_start(s))
521 s->style_attributes_temp.style_fontID = fontID;
524 static void mov_text_font_name_cb(void *priv, const char *name)
526 mov_text_font_name_set((MovTextContext*)priv, name);
529 static void mov_text_font_size_set(MovTextContext *s, int size)
531 size = FONTSIZE_SCALE(s, size);
532 if (s->style_attributes_temp.style_fontsize == size) {
533 // color hasn't changed
536 if (mov_text_style_start(s))
537 s->style_attributes_temp.style_fontsize = size;
540 static void mov_text_font_size_cb(void *priv, int size)
542 mov_text_font_size_set((MovTextContext*)priv, size);
545 static void mov_text_end_cb(void *priv)
547 // End of text, close any open style record
548 mov_text_style_start((MovTextContext*)priv);
551 static void mov_text_ass_style_set(MovTextContext *s, ASSStyle *style)
553 uint8_t style_flags, alpha;
557 style_flags = (!!style->bold * STYLE_FLAG_BOLD) |
558 (!!style->italic * STYLE_FLAG_ITALIC) |
559 (!!style->underline * STYLE_FLAG_UNDERLINE);
560 mov_text_style_set(s, style_flags);
561 color = BGR_TO_RGB(style->primary_color & 0xffffff) << 8;
562 mov_text_color_set(s, color);
563 alpha = 255 - ((uint32_t)style->primary_color >> 24);
564 mov_text_alpha_set(s, alpha);
565 mov_text_font_size_set(s, style->font_size);
566 mov_text_font_name_set(s, style->font_name);
568 // End current style record, go back to defaults
569 mov_text_style_start(s);
573 static void mov_text_dialog(MovTextContext *s, ASSDialog *dialog)
575 ASSStyle *style = ff_ass_style_get(s->ass_ctx, dialog->style);
577 s->ass_dialog_style = style;
578 mov_text_ass_style_set(s, style);
581 static void mov_text_cancel_overrides_cb(void *priv, const char *style_name)
583 MovTextContext *s = priv;
586 if (!style_name || !*style_name)
587 style = s->ass_dialog_style;
589 style= ff_ass_style_get(s->ass_ctx, style_name);
591 mov_text_ass_style_set(s, style);
594 static uint16_t utf8_strlen(const char *text, int len)
596 uint16_t i = 0, ret = 0;
601 else if ((c & 0xE0) == 0xC0)
603 else if ((c & 0xF0) == 0xE0)
605 else if ((c & 0xF8) == 0xF0)
614 static void mov_text_text_cb(void *priv, const char *text, int len)
616 uint16_t utf8_len = utf8_strlen(text, len);
617 MovTextContext *s = priv;
618 av_bprint_append_data(&s->buffer, text, len);
619 // If it's not utf-8, just use the byte length
620 s->text_pos += utf8_len ? utf8_len : len;
621 s->byte_count += len;
624 static void mov_text_new_line_cb(void *priv, int forced)
626 MovTextContext *s = priv;
627 av_bprint_append_data(&s->buffer, "\n", 1);
632 static const ASSCodesCallbacks mov_text_callbacks = {
633 .text = mov_text_text_cb,
634 .new_line = mov_text_new_line_cb,
635 .style = mov_text_style_cb,
636 .color = mov_text_color_cb,
637 .alpha = mov_text_alpha_cb,
638 .font_name = mov_text_font_name_cb,
639 .font_size = mov_text_font_size_cb,
640 .cancel_overrides = mov_text_cancel_overrides_cb,
641 .end = mov_text_end_cb,
644 static int mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf,
645 int bufsize, const AVSubtitle *sub)
647 MovTextContext *s = avctx->priv_data;
656 for (i = 0; i < sub->num_rects; i++) {
657 const char *ass = sub->rects[i]->ass;
659 if (sub->rects[i]->type != SUBTITLE_ASS) {
660 av_log(avctx, AV_LOG_ERROR, "Only SUBTITLE_ASS type supported.\n");
661 return AVERROR(EINVAL);
664 #if FF_API_ASS_TIMING
665 if (!strncmp(ass, "Dialogue: ", 10)) {
667 dialog = ff_ass_split_dialog(s->ass_ctx, ass, 0, &num);
668 for (; dialog && num--; dialog++) {
669 mov_text_dialog(s, dialog);
670 ff_ass_split_override_codes(&mov_text_callbacks, s, dialog->text);
674 dialog = ff_ass_split_dialog2(s->ass_ctx, ass);
676 return AVERROR(ENOMEM);
677 mov_text_dialog(s, dialog);
678 ff_ass_split_override_codes(&mov_text_callbacks, s, dialog->text);
679 ff_ass_free_dialog(&dialog);
680 #if FF_API_ASS_TIMING
684 for (j = 0; j < box_count; j++) {
685 box_types[j].encode(s, box_types[j].type);
689 AV_WB16(buf, s->byte_count);
692 if (!av_bprint_is_complete(&s->buffer)) {
693 length = AVERROR(ENOMEM);
697 if (!s->buffer.len) {
702 if (s->buffer.len > bufsize - 3) {
703 av_log(avctx, AV_LOG_ERROR, "Buffer too small for ASS event.\n");
704 length = AVERROR_BUFFER_TOO_SMALL;
708 memcpy(buf, s->buffer.str, s->buffer.len);
709 length = s->buffer.len + 2;
712 av_bprint_clear(&s->buffer);
716 #define OFFSET(x) offsetof(MovTextContext, x)
717 #define FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_SUBTITLE_PARAM
718 static const AVOption options[] = {
719 { "height", "Frame height, usually video height", OFFSET(frame_height), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
723 static const AVClass mov_text_encoder_class = {
724 .class_name = "MOV text enoder",
725 .item_name = av_default_item_name,
727 .version = LIBAVUTIL_VERSION_INT,
730 AVCodec ff_movtext_encoder = {
732 .long_name = NULL_IF_CONFIG_SMALL("3GPP Timed Text subtitle"),
733 .type = AVMEDIA_TYPE_SUBTITLE,
734 .id = AV_CODEC_ID_MOV_TEXT,
735 .priv_data_size = sizeof(MovTextContext),
736 .priv_class = &mov_text_encoder_class,
737 .init = mov_text_encode_init,
738 .encode_sub = mov_text_encode_frame,
739 .close = mov_text_encode_close,