2 * 3GPP TS 26.245 Timed Text encoder
3 * Copyright (c) 2012 Philip Langdale <philipl@overt.org>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "libavutil/opt.h"
25 #include "libavutil/avassert.h"
26 #include "libavutil/avstring.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/mem.h"
29 #include "libavutil/common.h"
30 #include "ass_split.h"
32 #include "bytestream.h"
34 #define STYLE_FLAG_BOLD (1<<0)
35 #define STYLE_FLAG_ITALIC (1<<1)
36 #define STYLE_FLAG_UNDERLINE (1<<2)
37 #define STYLE_RECORD_SIZE 12
40 #define STYL_BOX (1<<0)
41 #define HLIT_BOX (1<<1)
42 #define HCLR_BOX (1<<2)
44 #define DEFAULT_STYLE_FONT_ID 0x01
45 #define DEFAULT_STYLE_FONTSIZE 0x12
46 #define DEFAULT_STYLE_COLOR 0xffffffff
47 #define DEFAULT_STYLE_FLAG 0x00
49 #define BGR_TO_RGB(c) (((c) & 0xff) << 16 | ((c) & 0xff00) | (((uint32_t)(c) >> 16) & 0xff))
50 #define FONTSIZE_SCALE(s,fs) ((fs) * (s)->font_scale_factor + 0.5)
51 #define av_bprint_append_any(buf, data, size) av_bprint_append_data(buf, ((const char*)data), size)
57 uint16_t style_fontID;
58 uint8_t style_fontsize;
73 AVCodecContext *avctx;
75 ASSSplitContext *ass_ctx;
76 ASSStyle *ass_dialog_style;
77 StyleBox *style_attributes;
79 unsigned style_attributes_bytes_allocated;
80 StyleBox style_attributes_temp;
90 double font_scale_factor;
95 void (*encode)(MovTextContext *s);
98 static void mov_text_cleanup(MovTextContext *s)
101 s->style_attributes_temp = s->d;
104 static void encode_styl(MovTextContext *s)
106 if ((s->box_flags & STYL_BOX) && s->count) {
107 uint8_t buf[12], *p = buf;
109 bytestream_put_be32(&p, s->count * STYLE_RECORD_SIZE + SIZE_ADD);
110 bytestream_put_be32(&p, MKBETAG('s','t','y','l'));
111 bytestream_put_be16(&p, s->count);
112 /*The above three attributes are hard coded for now
113 but will come from ASS style in the future*/
114 av_bprint_append_any(&s->buffer, buf, 10);
115 for (unsigned j = 0; j < s->count; j++) {
116 const StyleBox *style = &s->style_attributes[j];
119 bytestream_put_be16(&p, style->style_start);
120 bytestream_put_be16(&p, style->style_end);
121 bytestream_put_be16(&p, style->style_fontID);
122 bytestream_put_byte(&p, style->style_flag);
123 bytestream_put_byte(&p, style->style_fontsize);
124 bytestream_put_be32(&p, style->style_color);
126 av_bprint_append_any(&s->buffer, buf, 12);
132 static void encode_hlit(MovTextContext *s)
134 if (s->box_flags & HLIT_BOX) {
135 uint8_t buf[12], *p = buf;
137 bytestream_put_be32(&p, 12);
138 bytestream_put_be32(&p, MKBETAG('h','l','i','t'));
139 bytestream_put_be16(&p, s->hlit.start);
140 bytestream_put_be16(&p, s->hlit.end);
142 av_bprint_append_any(&s->buffer, buf, 12);
146 static void encode_hclr(MovTextContext *s)
148 if (s->box_flags & HCLR_BOX) {
149 uint8_t buf[12], *p = buf;
151 bytestream_put_be32(&p, 12);
152 bytestream_put_be32(&p, MKBETAG('h','c','l','r'));
153 bytestream_put_be32(&p, s->hclr.color);
155 av_bprint_append_any(&s->buffer, buf, 12);
159 static const Box box_types[] = {
165 const static size_t box_count = FF_ARRAY_ELEMS(box_types);
167 static int mov_text_encode_close(AVCodecContext *avctx)
169 MovTextContext *s = avctx->priv_data;
171 ff_ass_split_free(s->ass_ctx);
172 av_freep(&s->style_attributes);
174 av_bprint_finalize(&s->buffer, NULL);
178 static int encode_sample_description(AVCodecContext *avctx)
183 uint32_t back_color = 0;
184 int font_names_total_len = 0;
185 MovTextContext *s = avctx->priv_data;
186 uint8_t buf[30], *p = buf;
188 // 0x00, 0x00, 0x00, 0x00, // uint32_t displayFlags
189 // 0x01, // int8_t horizontal-justification
190 // 0xFF, // int8_t vertical-justification
191 // 0x00, 0x00, 0x00, 0x00, // uint8_t background-color-rgba[4]
193 // 0x00, 0x00, // int16_t top
194 // 0x00, 0x00, // int16_t left
195 // 0x00, 0x00, // int16_t bottom
196 // 0x00, 0x00, // int16_t right
199 // 0x00, 0x00, // uint16_t startChar
200 // 0x00, 0x00, // uint16_t endChar
201 // 0x00, 0x01, // uint16_t font-ID
202 // 0x00, // uint8_t face-style-flags
203 // 0x12, // uint8_t font-size
204 // 0xFF, 0xFF, 0xFF, 0xFF, // uint8_t text-color-rgba[4]
207 // 0x00, 0x00, 0x00, 0x12, // uint32_t size
208 // 'f', 't', 'a', 'b', // uint8_t name[4]
209 // 0x00, 0x01, // uint16_t entry-count
211 // 0x00, 0x01, // uint16_t font-ID
212 // 0x05, // uint8_t font-name-length
213 // 'S', 'e', 'r', 'i', 'f',// uint8_t font[font-name-length]
217 // Populate sample description from ASS header
218 ass = (ASS*)s->ass_ctx;
219 // Compute font scaling factor based on (optionally) provided
220 // output video height and ASS script play_res_y
221 if (s->frame_height && ass->script_info.play_res_y)
222 s->font_scale_factor = (double)s->frame_height / ass->script_info.play_res_y;
224 s->font_scale_factor = 1;
226 style = ff_ass_style_get(s->ass_ctx, "Default");
227 if (!style && ass->styles_count) {
228 style = &ass->styles[0];
230 s->d.style_fontID = DEFAULT_STYLE_FONT_ID;
231 s->d.style_fontsize = DEFAULT_STYLE_FONTSIZE;
232 s->d.style_color = DEFAULT_STYLE_COLOR;
233 s->d.style_flag = DEFAULT_STYLE_FLAG;
235 s->d.style_fontsize = FONTSIZE_SCALE(s, style->font_size);
236 s->d.style_color = BGR_TO_RGB(style->primary_color & 0xffffff) << 8 |
237 255 - ((uint32_t)style->primary_color >> 24);
238 s->d.style_flag = (!!style->bold * STYLE_FLAG_BOLD) |
239 (!!style->italic * STYLE_FLAG_ITALIC) |
240 (!!style->underline * STYLE_FLAG_UNDERLINE);
241 back_color = (BGR_TO_RGB(style->back_color & 0xffffff) << 8) |
242 (255 - ((uint32_t)style->back_color >> 24));
245 bytestream_put_be32(&p, 0); // displayFlags
246 bytestream_put_be16(&p, 0x01FF); // horizontal/vertical justification (2x int8_t)
247 bytestream_put_be32(&p, back_color);
248 bytestream_put_be64(&p, 0); // BoxRecord - 4xint16_t: top, left, bottom, right
250 bytestream_put_be16(&p, s->d.style_start);
251 bytestream_put_be16(&p, s->d.style_end);
252 bytestream_put_be16(&p, s->d.style_fontID);
253 bytestream_put_byte(&p, s->d.style_flag);
254 bytestream_put_byte(&p, s->d.style_fontsize);
255 bytestream_put_be32(&p, s->d.style_color);
257 av_bprint_append_any(&s->buffer, buf, 30);
260 // We can't build a complete font table since that would require
261 // scanning all dialogs first. But we can at least fill in what
262 // is avaiable in the ASS header
263 if (style && ass->styles_count) {
264 // Find unique font names
265 if (style->font_name) {
266 av_dynarray_add(&s->fonts, &s->font_count, style->font_name);
267 font_names_total_len += strlen(style->font_name);
269 for (i = 0; i < ass->styles_count; i++) {
271 if (!ass->styles[i].font_name)
273 for (j = 0; j < s->font_count; j++) {
274 if (!strcmp(s->fonts[j], ass->styles[i].font_name)) {
280 av_dynarray_add(&s->fonts, &s->font_count,
281 ass->styles[i].font_name);
282 font_names_total_len += strlen(ass->styles[i].font_name);
286 av_dynarray_add(&s->fonts, &s->font_count, (char*)"Serif");
290 bytestream_put_be32(&p, SIZE_ADD + 3 * s->font_count + font_names_total_len); // Size
291 bytestream_put_be32(&p, MKBETAG('f','t','a','b'));
292 bytestream_put_be16(&p, s->font_count);
294 av_bprint_append_any(&s->buffer, buf, 10);
296 for (i = 0; i < s->font_count; i++) {
297 size_t len = strlen(s->fonts[i]);
300 bytestream_put_be16(&p, i + 1); //fontID
301 bytestream_put_byte(&p, len);
303 av_bprint_append_any(&s->buffer, buf, 3);
304 av_bprint_append_any(&s->buffer, s->fonts[i], len);
309 if (!av_bprint_is_complete(&s->buffer)) {
310 return AVERROR(ENOMEM);
313 avctx->extradata_size = s->buffer.len;
314 avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
315 if (!avctx->extradata) {
316 return AVERROR(ENOMEM);
319 memcpy(avctx->extradata, s->buffer.str, avctx->extradata_size);
320 av_bprint_clear(&s->buffer);
325 static av_cold int mov_text_encode_init(AVCodecContext *avctx)
328 MovTextContext *s = avctx->priv_data;
331 av_bprint_init(&s->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
333 s->ass_ctx = ff_ass_split(avctx->subtitle_header);
335 ret = AVERROR_INVALIDDATA;
338 ret = encode_sample_description(avctx);
345 mov_text_encode_close(avctx);
349 // Start a new style box if needed
350 static int mov_text_style_start(MovTextContext *s)
352 // there's an existing style entry
353 if (s->style_attributes_temp.style_start == s->text_pos)
354 // Still at same text pos, use same entry
356 if (s->style_attributes_temp.style_flag != s->d.style_flag ||
357 s->style_attributes_temp.style_color != s->d.style_color ||
358 s->style_attributes_temp.style_fontID != s->d.style_fontID ||
359 s->style_attributes_temp.style_fontsize != s->d.style_fontsize) {
362 // last style != defaults, end the style entry and start a new one
363 if (s->count + 1 > SIZE_MAX / sizeof(*s->style_attributes) ||
364 !(tmp = av_fast_realloc(s->style_attributes,
365 &s->style_attributes_bytes_allocated,
366 (s->count + 1) * sizeof(*s->style_attributes)))) {
368 av_bprint_clear(&s->buffer);
369 s->box_flags &= ~STYL_BOX;
372 s->style_attributes = tmp;
373 s->style_attributes_temp.style_end = s->text_pos;
374 s->style_attributes[s->count++] = s->style_attributes_temp;
375 s->box_flags |= STYL_BOX;
376 s->style_attributes_temp = s->d;
377 s->style_attributes_temp.style_start = s->text_pos;
378 } else { // style entry matches defaults, drop entry
379 s->style_attributes_temp = s->d;
380 s->style_attributes_temp.style_start = s->text_pos;
385 static uint8_t mov_text_style_to_flag(const char style)
387 uint8_t style_flag = 0;
391 style_flag = STYLE_FLAG_BOLD;
394 style_flag = STYLE_FLAG_ITALIC;
397 style_flag = STYLE_FLAG_UNDERLINE;
403 static void mov_text_style_set(MovTextContext *s, uint8_t style_flags)
405 if (!((s->style_attributes_temp.style_flag & style_flags) ^ style_flags)) {
406 // setting flags that that are already set
409 if (mov_text_style_start(s))
410 s->style_attributes_temp.style_flag |= style_flags;
413 static void mov_text_style_cb(void *priv, const char style, int close)
415 MovTextContext *s = priv;
416 uint8_t style_flag = mov_text_style_to_flag(style);
418 if (!!(s->style_attributes_temp.style_flag & style_flag) != close) {
419 // setting flag that is already set
422 if (mov_text_style_start(s)) {
424 s->style_attributes_temp.style_flag |= style_flag;
426 s->style_attributes_temp.style_flag &= ~style_flag;
430 static void mov_text_color_set(MovTextContext *s, uint32_t color)
432 if ((s->style_attributes_temp.style_color & 0xffffff00) == color) {
433 // color hasn't changed
436 if (mov_text_style_start(s))
437 s->style_attributes_temp.style_color = (color & 0xffffff00) |
438 (s->style_attributes_temp.style_color & 0xff);
441 static void mov_text_color_cb(void *priv, unsigned int color, unsigned int color_id)
443 MovTextContext *s = priv;
445 color = BGR_TO_RGB(color) << 8;
446 if (color_id == 1) { //primary color changes
447 mov_text_color_set(s, color);
448 } else if (color_id == 2) { //secondary color changes
449 if (!(s->box_flags & HCLR_BOX))
450 // Highlight alpha not set yet, use current primary alpha
451 s->hclr.color = s->style_attributes_temp.style_color;
452 if (!(s->box_flags & HLIT_BOX) || s->hlit.start == s->text_pos) {
453 s->box_flags |= HCLR_BOX;
454 s->box_flags |= HLIT_BOX;
455 s->hlit.start = s->text_pos;
456 s->hclr.color = color | (s->hclr.color & 0xFF);
459 s->hlit.end = s->text_pos;
460 /* If there are more than one secondary color changes in ASS,
461 take start of first section and end of last section. Movtext
462 allows only one highlight box per sample.
465 // Movtext does not support changes to other color_id (outline, background)
468 static void mov_text_alpha_set(MovTextContext *s, uint8_t alpha)
470 if ((s->style_attributes_temp.style_color & 0xff) == alpha) {
471 // color hasn't changed
474 if (mov_text_style_start(s))
475 s->style_attributes_temp.style_color =
476 (s->style_attributes_temp.style_color & 0xffffff00) | alpha;
479 static void mov_text_alpha_cb(void *priv, int alpha, int alpha_id)
481 MovTextContext *s = priv;
484 if (alpha_id == 1) // primary alpha changes
485 mov_text_alpha_set(s, alpha);
486 else if (alpha_id == 2) { //secondary alpha changes
487 if (!(s->box_flags & HCLR_BOX))
488 // Highlight color not set yet, use current primary color
489 s->hclr.color = s->style_attributes_temp.style_color;
490 if (!(s->box_flags & HLIT_BOX) || s->hlit.start == s->text_pos) {
491 s->box_flags |= HCLR_BOX;
492 s->box_flags |= HLIT_BOX;
493 s->hlit.start = s->text_pos;
494 s->hclr.color = (s->hclr.color & 0xffffff00) | alpha;
497 s->hlit.end = s->text_pos;
499 // Movtext does not support changes to other alpha_id (outline, background)
502 static uint16_t find_font_id(MovTextContext *s, const char *name)
505 for (i = 0; i < s->font_count; i++) {
506 if (!strcmp(name, s->fonts[i]))
512 static void mov_text_font_name_set(MovTextContext *s, const char *name)
514 int fontID = find_font_id(s, name);
515 if (s->style_attributes_temp.style_fontID == fontID) {
516 // color hasn't changed
519 if (mov_text_style_start(s))
520 s->style_attributes_temp.style_fontID = fontID;
523 static void mov_text_font_name_cb(void *priv, const char *name)
525 mov_text_font_name_set((MovTextContext*)priv, name);
528 static void mov_text_font_size_set(MovTextContext *s, int size)
530 size = FONTSIZE_SCALE(s, size);
531 if (s->style_attributes_temp.style_fontsize == size) {
532 // color hasn't changed
535 if (mov_text_style_start(s))
536 s->style_attributes_temp.style_fontsize = size;
539 static void mov_text_font_size_cb(void *priv, int size)
541 mov_text_font_size_set((MovTextContext*)priv, size);
544 static void mov_text_end_cb(void *priv)
546 // End of text, close any open style record
547 mov_text_style_start((MovTextContext*)priv);
550 static void mov_text_ass_style_set(MovTextContext *s, ASSStyle *style)
552 uint8_t style_flags, alpha;
556 style_flags = (!!style->bold * STYLE_FLAG_BOLD) |
557 (!!style->italic * STYLE_FLAG_ITALIC) |
558 (!!style->underline * STYLE_FLAG_UNDERLINE);
559 mov_text_style_set(s, style_flags);
560 color = BGR_TO_RGB(style->primary_color & 0xffffff) << 8;
561 mov_text_color_set(s, color);
562 alpha = 255 - ((uint32_t)style->primary_color >> 24);
563 mov_text_alpha_set(s, alpha);
564 mov_text_font_size_set(s, style->font_size);
565 mov_text_font_name_set(s, style->font_name);
567 // End current style record, go back to defaults
568 mov_text_style_start(s);
572 static void mov_text_dialog(MovTextContext *s, ASSDialog *dialog)
574 ASSStyle *style = ff_ass_style_get(s->ass_ctx, dialog->style);
576 s->ass_dialog_style = style;
577 mov_text_ass_style_set(s, style);
580 static void mov_text_cancel_overrides_cb(void *priv, const char *style_name)
582 MovTextContext *s = priv;
585 if (!style_name || !*style_name)
586 style = s->ass_dialog_style;
588 style= ff_ass_style_get(s->ass_ctx, style_name);
590 mov_text_ass_style_set(s, style);
593 static uint16_t utf8_strlen(const char *text, int len)
595 uint16_t i = 0, ret = 0;
600 else if ((c & 0xE0) == 0xC0)
602 else if ((c & 0xF0) == 0xE0)
604 else if ((c & 0xF8) == 0xF0)
613 static void mov_text_text_cb(void *priv, const char *text, int len)
615 uint16_t utf8_len = utf8_strlen(text, len);
616 MovTextContext *s = priv;
617 av_bprint_append_data(&s->buffer, text, len);
618 // If it's not utf-8, just use the byte length
619 s->text_pos += utf8_len ? utf8_len : len;
620 s->byte_count += len;
623 static void mov_text_new_line_cb(void *priv, int forced)
625 MovTextContext *s = priv;
626 av_bprint_append_data(&s->buffer, "\n", 1);
631 static const ASSCodesCallbacks mov_text_callbacks = {
632 .text = mov_text_text_cb,
633 .new_line = mov_text_new_line_cb,
634 .style = mov_text_style_cb,
635 .color = mov_text_color_cb,
636 .alpha = mov_text_alpha_cb,
637 .font_name = mov_text_font_name_cb,
638 .font_size = mov_text_font_size_cb,
639 .cancel_overrides = mov_text_cancel_overrides_cb,
640 .end = mov_text_end_cb,
643 static int mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf,
644 int bufsize, const AVSubtitle *sub)
646 MovTextContext *s = avctx->priv_data;
655 for (i = 0; i < sub->num_rects; i++) {
656 const char *ass = sub->rects[i]->ass;
658 if (sub->rects[i]->type != SUBTITLE_ASS) {
659 av_log(avctx, AV_LOG_ERROR, "Only SUBTITLE_ASS type supported.\n");
660 return AVERROR(EINVAL);
663 #if FF_API_ASS_TIMING
664 if (!strncmp(ass, "Dialogue: ", 10)) {
666 dialog = ff_ass_split_dialog(s->ass_ctx, ass, 0, &num);
667 for (; dialog && num--; dialog++) {
668 mov_text_dialog(s, dialog);
669 ff_ass_split_override_codes(&mov_text_callbacks, s, dialog->text);
673 dialog = ff_ass_split_dialog2(s->ass_ctx, ass);
675 return AVERROR(ENOMEM);
676 mov_text_dialog(s, dialog);
677 ff_ass_split_override_codes(&mov_text_callbacks, s, dialog->text);
678 ff_ass_free_dialog(&dialog);
679 #if FF_API_ASS_TIMING
683 for (j = 0; j < box_count; j++) {
684 box_types[j].encode(s);
688 AV_WB16(buf, s->byte_count);
691 if (!av_bprint_is_complete(&s->buffer)) {
692 length = AVERROR(ENOMEM);
696 if (!s->buffer.len) {
701 if (s->buffer.len > bufsize - 3) {
702 av_log(avctx, AV_LOG_ERROR, "Buffer too small for ASS event.\n");
703 length = AVERROR_BUFFER_TOO_SMALL;
707 memcpy(buf, s->buffer.str, s->buffer.len);
708 length = s->buffer.len + 2;
711 av_bprint_clear(&s->buffer);
715 #define OFFSET(x) offsetof(MovTextContext, x)
716 #define FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_SUBTITLE_PARAM
717 static const AVOption options[] = {
718 { "height", "Frame height, usually video height", OFFSET(frame_height), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
722 static const AVClass mov_text_encoder_class = {
723 .class_name = "MOV text enoder",
724 .item_name = av_default_item_name,
726 .version = LIBAVUTIL_VERSION_INT,
729 AVCodec ff_movtext_encoder = {
731 .long_name = NULL_IF_CONFIG_SMALL("3GPP Timed Text subtitle"),
732 .type = AVMEDIA_TYPE_SUBTITLE,
733 .id = AV_CODEC_ID_MOV_TEXT,
734 .priv_data_size = sizeof(MovTextContext),
735 .priv_class = &mov_text_encoder_class,
736 .init = mov_text_encode_init,
737 .encode_sub = mov_text_encode_frame,
738 .close = mov_text_encode_close,