git.sesse.net Git - ffmpeg/blob - libavcodec/movtextdec.c

   1 /*
   2  * 3GPP TS 26.245 Timed Text decoder
   3  * Copyright (c) 2012  Philip Langdale <philipl@overt.org>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "avcodec.h"
  23 #include "ass.h"
  24 #include "libavutil/avstring.h"
  25 #include "libavutil/common.h"
  26 #include "libavutil/bprint.h"
  27 #include "libavutil/intreadwrite.h"
  28 #include "libavutil/mem.h"
  29
  30 #define STYLE_FLAG_BOLD         (1<<0)
  31 #define STYLE_FLAG_ITALIC       (1<<1)
  32 #define STYLE_FLAG_UNDERLINE    (1<<2)
  33
  34 #define BOX_SIZE_INITIAL    40
  35
  36 #define STYL_BOX   (1<<0)
  37 #define HLIT_BOX   (1<<1)
  38 #define HCLR_BOX   (1<<2)
  39 #define TWRP_BOX   (1<<3)
  40
  41 #define BOTTOM_LEFT     1
  42 #define BOTTOM_CENTER   2
  43 #define BOTTOM_RIGHT    3
  44 #define MIDDLE_LEFT     4
  45 #define MIDDLE_CENTER   5
  46 #define MIDDLE_RIGHT    6
  47 #define TOP_LEFT        7
  48 #define TOP_CENTER      8
  49 #define TOP_RIGHT       9
  50
  51 #define RGB_TO_BGR(c) (((c) & 0xff) << 16 | ((c) & 0xff00) | (((c) >> 16) & 0xff))
  52
  53 typedef struct {
  54     char *font;
  55     int fontsize;
  56     int color;
  57     int back_color;
  58     int bold;
  59     int italic;
  60     int underline;
  61     int alignment;
  62 } MovTextDefault;
  63
  64 typedef struct {
  65     uint16_t fontID;
  66     char *font;
  67 } FontRecord;
  68
  69 typedef struct {
  70     uint16_t style_start;
  71     uint16_t style_end;
  72     uint8_t style_flag;
  73     uint8_t fontsize;
  74     uint16_t style_fontID;
  75 } StyleBox;
  76
  77 typedef struct {
  78     uint16_t hlit_start;
  79     uint16_t hlit_end;
  80 } HighlightBox;
  81
  82 typedef struct {
  83    uint8_t hlit_color[4];
  84 } HilightcolorBox;
  85
  86 typedef struct {
  87     uint8_t wrap_flag;
  88 } TextWrapBox;
  89
  90 typedef struct {
  91     StyleBox **s;
  92     StyleBox *s_temp;
  93     HighlightBox h;
  94     HilightcolorBox c;
  95     FontRecord **ftab;
  96     FontRecord *ftab_temp;
  97     TextWrapBox w;
  98     MovTextDefault d;
  99     uint8_t box_flags;
 100     uint16_t style_entries, ftab_entries;
 101     uint64_t tracksize;
 102     int size_var;
 103     int count_s, count_f;
 104     int readorder;
 105 } MovTextContext;
 106
 107 typedef struct {
 108     uint32_t type;
 109     size_t base_size;
 110     int (*decode)(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt);
 111 } Box;
 112
 113 static void mov_text_cleanup(MovTextContext *m)
 114 {
 115     int i;
 116     if (m->box_flags & STYL_BOX) {
 117         for(i = 0; i < m->count_s; i++) {
 118             av_freep(&m->s[i]);
 119         }
 120         av_freep(&m->s);
 121         m->count_s = 0;
 122         m->style_entries = 0;
 123     }
 124 }
 125
 126 static void mov_text_cleanup_ftab(MovTextContext *m)
 127 {
 128     int i;
 129     if (m->ftab_temp)
 130         av_freep(&m->ftab_temp->font);
 131     av_freep(&m->ftab_temp);
 132     if (m->ftab) {
 133         for(i = 0; i < m->count_f; i++) {
 134             av_freep(&m->ftab[i]->font);
 135             av_freep(&m->ftab[i]);
 136         }
 137     }
 138     av_freep(&m->ftab);
 139 }
 140
 141 static int mov_text_tx3g(AVCodecContext *avctx, MovTextContext *m)
 142 {
 143     uint8_t *tx3g_ptr = avctx->extradata;
 144     int i, box_size, font_length;
 145     int8_t v_align, h_align;
 146     int style_fontID;
 147     StyleBox s_default;
 148
 149     m->count_f = 0;
 150     m->ftab_entries = 0;
 151     box_size = BOX_SIZE_INITIAL; /* Size till ftab_entries */
 152     if (avctx->extradata_size < box_size)
 153         return -1;
 154
 155     // Display Flags
 156     tx3g_ptr += 4;
 157     // Alignment
 158     h_align = *tx3g_ptr++;
 159     v_align = *tx3g_ptr++;
 160     if (h_align == 0) {
 161         if (v_align == 0)
 162             m->d.alignment = TOP_LEFT;
 163         if (v_align == 1)
 164             m->d.alignment = MIDDLE_LEFT;
 165         if (v_align == -1)
 166             m->d.alignment = BOTTOM_LEFT;
 167     }
 168     if (h_align == 1) {
 169         if (v_align == 0)
 170             m->d.alignment = TOP_CENTER;
 171         if (v_align == 1)
 172             m->d.alignment = MIDDLE_CENTER;
 173         if (v_align == -1)
 174             m->d.alignment = BOTTOM_CENTER;
 175     }
 176     if (h_align == -1) {
 177         if (v_align == 0)
 178             m->d.alignment = TOP_RIGHT;
 179         if (v_align == 1)
 180             m->d.alignment = MIDDLE_RIGHT;
 181         if (v_align == -1)
 182             m->d.alignment = BOTTOM_RIGHT;
 183     }
 184     // Background Color
 185     m->d.back_color = AV_RB24(tx3g_ptr);
 186     tx3g_ptr += 4;
 187     // BoxRecord
 188     tx3g_ptr += 8;
 189     // StyleRecord
 190     tx3g_ptr += 4;
 191     // fontID
 192     style_fontID = AV_RB16(tx3g_ptr);
 193     tx3g_ptr += 2;
 194     // face-style-flags
 195     s_default.style_flag = *tx3g_ptr++;
 196     m->d.bold = !!(s_default.style_flag & STYLE_FLAG_BOLD);
 197     m->d.italic = !!(s_default.style_flag & STYLE_FLAG_ITALIC);
 198     m->d.underline = !!(s_default.style_flag & STYLE_FLAG_UNDERLINE);
 199     // fontsize
 200     m->d.fontsize = *tx3g_ptr++;
 201     // Primary color
 202     m->d.color = AV_RB24(tx3g_ptr);
 203     tx3g_ptr += 4;
 204     // FontRecord
 205     // FontRecord Size
 206     tx3g_ptr += 4;
 207     // ftab
 208     tx3g_ptr += 4;
 209
 210     m->ftab_entries = AV_RB16(tx3g_ptr);
 211     tx3g_ptr += 2;
 212
 213     for (i = 0; i < m->ftab_entries; i++) {
 214
 215         box_size += 3;
 216         if (avctx->extradata_size < box_size) {
 217             mov_text_cleanup_ftab(m);
 218             m->ftab_entries = 0;
 219             return -1;
 220         }
 221         m->ftab_temp = av_mallocz(sizeof(*m->ftab_temp));
 222         if (!m->ftab_temp) {
 223             mov_text_cleanup_ftab(m);
 224             return AVERROR(ENOMEM);
 225         }
 226         m->ftab_temp->fontID = AV_RB16(tx3g_ptr);
 227         tx3g_ptr += 2;
 228         font_length = *tx3g_ptr++;
 229
 230         box_size = box_size + font_length;
 231         if (avctx->extradata_size < box_size) {
 232             mov_text_cleanup_ftab(m);
 233             m->ftab_entries = 0;
 234             return -1;
 235         }
 236         m->ftab_temp->font = av_malloc(font_length + 1);
 237         if (!m->ftab_temp->font) {
 238             mov_text_cleanup_ftab(m);
 239             return AVERROR(ENOMEM);
 240         }
 241         memcpy(m->ftab_temp->font, tx3g_ptr, font_length);
 242         m->ftab_temp->font[font_length] = '\0';
 243         av_dynarray_add(&m->ftab, &m->count_f, m->ftab_temp);
 244         if (!m->ftab) {
 245             mov_text_cleanup_ftab(m);
 246             return AVERROR(ENOMEM);
 247         }
 248         m->ftab_temp = NULL;
 249         tx3g_ptr = tx3g_ptr + font_length;
 250     }
 251     for (i = 0; i < m->ftab_entries; i++) {
 252         if (style_fontID == m->ftab[i]->fontID)
 253             m->d.font = m->ftab[i]->font;
 254     }
 255     return 0;
 256 }
 257
 258 static int decode_twrp(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
 259 {
 260     m->box_flags |= TWRP_BOX;
 261     m->w.wrap_flag = *tsmb++;
 262     return 0;
 263 }
 264
 265 static int decode_hlit(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
 266 {
 267     m->box_flags |= HLIT_BOX;
 268     m->h.hlit_start = AV_RB16(tsmb);
 269     tsmb += 2;
 270     m->h.hlit_end = AV_RB16(tsmb);
 271     tsmb += 2;
 272     return 0;
 273 }
 274
 275 static int decode_hclr(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
 276 {
 277     m->box_flags |= HCLR_BOX;
 278     memcpy(m->c.hlit_color, tsmb, 4);
 279     tsmb += 4;
 280     return 0;
 281 }
 282
 283 static int decode_styl(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
 284 {
 285     int i;
 286     int style_entries = AV_RB16(tsmb);
 287     tsmb += 2;
 288     // A single style record is of length 12 bytes.
 289     if (m->tracksize + m->size_var + 2 + style_entries * 12 > avpkt->size)
 290         return -1;
 291
 292     m->style_entries = style_entries;
 293
 294     m->box_flags |= STYL_BOX;
 295     for(i = 0; i < m->style_entries; i++) {
 296         m->s_temp = av_malloc(sizeof(*m->s_temp));
 297         if (!m->s_temp) {
 298             mov_text_cleanup(m);
 299             return AVERROR(ENOMEM);
 300         }
 301         m->s_temp->style_start = AV_RB16(tsmb);
 302         tsmb += 2;
 303         m->s_temp->style_end = AV_RB16(tsmb);
 304
 305         if (   m->s_temp->style_end < m->s_temp->style_start
 306             || (m->count_s && m->s_temp->style_start < m->s[m->count_s - 1]->style_end)) {
 307             av_freep(&m->s_temp);
 308             mov_text_cleanup(m);
 309             return AVERROR(ENOMEM);
 310         }
 311
 312         tsmb += 2;
 313         m->s_temp->style_fontID = AV_RB16(tsmb);
 314         tsmb += 2;
 315         m->s_temp->style_flag = AV_RB8(tsmb);
 316         tsmb++;
 317         m->s_temp->fontsize = AV_RB8(tsmb);
 318         av_dynarray_add(&m->s, &m->count_s, m->s_temp);
 319         if(!m->s) {
 320             mov_text_cleanup(m);
 321             return AVERROR(ENOMEM);
 322         }
 323         tsmb++;
 324         // text-color-rgba
 325         tsmb += 4;
 326     }
 327     return 0;
 328 }
 329
 330 static const Box box_types[] = {
 331     { MKBETAG('s','t','y','l'), 2, decode_styl },
 332     { MKBETAG('h','l','i','t'), 4, decode_hlit },
 333     { MKBETAG('h','c','l','r'), 4, decode_hclr },
 334     { MKBETAG('t','w','r','p'), 1, decode_twrp }
 335 };
 336
 337 const static size_t box_count = FF_ARRAY_ELEMS(box_types);
 338
 339 // Return byte length of the UTF-8 sequence starting at text[0]. 0 on error.
 340 static int get_utf8_length_at(const char *text, const char *text_end)
 341 {
 342     const char *start = text;
 343     int err = 0;
 344     uint32_t c;
 345     GET_UTF8(c, text < text_end ? (uint8_t)*text++ : (err = 1, 0), goto error;);
 346     if (err)
 347         goto error;
 348     return text - start;
 349 error:
 350     return 0;
 351 }
 352
 353 static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end,
 354                        AVCodecContext *avctx)
 355 {
 356     MovTextContext *m = avctx->priv_data;
 357     int i = 0;
 358     int text_pos = 0;
 359     int style_active = 0;
 360     int entry = 0;
 361
 362     if (text < text_end && m->box_flags & TWRP_BOX) {
 363         if (m->w.wrap_flag == 1) {
 364             av_bprintf(buf, "{\\q1}"); /* End of line wrap */
 365         } else {
 366             av_bprintf(buf, "{\\q2}"); /* No wrap */
 367         }
 368     }
 369
 370     while (text < text_end) {
 371         int len;
 372
 373         if ((m->box_flags & STYL_BOX) && entry < m->style_entries) {
 374             if (text_pos == m->s[entry]->style_start) {
 375                 style_active = 1;
 376                 if (m->s[entry]->style_flag & STYLE_FLAG_BOLD)
 377                     av_bprintf(buf, "{\\b1}");
 378                 if (m->s[entry]->style_flag & STYLE_FLAG_ITALIC)
 379                     av_bprintf(buf, "{\\i1}");
 380                 if (m->s[entry]->style_flag & STYLE_FLAG_UNDERLINE)
 381                     av_bprintf(buf, "{\\u1}");
 382                 av_bprintf(buf, "{\\fs%d}", m->s[entry]->fontsize);
 383                 for (i = 0; i < m->ftab_entries; i++) {
 384                     if (m->s[entry]->style_fontID == m->ftab[i]->fontID)
 385                         av_bprintf(buf, "{\\fn%s}", m->ftab[i]->font);
 386                 }
 387             }
 388             if (text_pos == m->s[entry]->style_end) {
 389                 if (style_active) {
 390                     av_bprintf(buf, "{\\r}");
 391                     style_active = 0;
 392                 }
 393                 entry++;
 394             }
 395         }
 396         if (m->box_flags & HLIT_BOX) {
 397             if (text_pos == m->h.hlit_start) {
 398                 /* If hclr box is present, set the secondary color to the color
 399                  * specified. Otherwise, set primary color to white and secondary
 400                  * color to black. These colors will come from TextSampleModifier
 401                  * boxes in future and inverse video technique for highlight will
 402                  * be implemented.
 403                  */
 404                 if (m->box_flags & HCLR_BOX) {
 405                     av_bprintf(buf, "{\\2c&H%02x%02x%02x&}", m->c.hlit_color[2],
 406                                 m->c.hlit_color[1], m->c.hlit_color[0]);
 407                 } else {
 408                     av_bprintf(buf, "{\\1c&H000000&}{\\2c&HFFFFFF&}");
 409                 }
 410             }
 411             if (text_pos == m->h.hlit_end) {
 412                 if (m->box_flags & HCLR_BOX) {
 413                     av_bprintf(buf, "{\\2c&H000000&}");
 414                 } else {
 415                     av_bprintf(buf, "{\\1c&HFFFFFF&}{\\2c&H000000&}");
 416                 }
 417             }
 418         }
 419
 420         len = get_utf8_length_at(text, text_end);
 421         if (len < 1) {
 422             av_log(avctx, AV_LOG_ERROR, "invalid UTF-8 byte in subtitle\n");
 423             len = 1;
 424         }
 425         for (i = 0; i < len; i++) {
 426             switch (*text) {
 427             case '\r':
 428                 break;
 429             case '\n':
 430                 av_bprintf(buf, "\\N");
 431                 break;
 432             default:
 433                 av_bprint_chars(buf, *text, 1);
 434                 break;
 435             }
 436             text++;
 437         }
 438         text_pos++;
 439     }
 440
 441     return 0;
 442 }
 443
 444 static int mov_text_init(AVCodecContext *avctx) {
 445     /*
 446      * TODO: Handle the default text style.
 447      * NB: Most players ignore styles completely, with the result that
 448      * it's very common to find files where the default style is broken
 449      * and respecting it results in a worse experience than ignoring it.
 450      */
 451     int ret;
 452     MovTextContext *m = avctx->priv_data;
 453     ret = mov_text_tx3g(avctx, m);
 454     if (ret == 0) {
 455         return ff_ass_subtitle_header(avctx, m->d.font, m->d.fontsize,
 456                     RGB_TO_BGR(m->d.color),
 457                     RGB_TO_BGR(m->d.back_color),
 458                     m->d.bold, m->d.italic, m->d.underline,
 459                     ASS_DEFAULT_BORDERSTYLE, m->d.alignment);
 460     } else
 461         return ff_ass_subtitle_header_default(avctx);
 462 }
 463
 464 static int mov_text_decode_frame(AVCodecContext *avctx,
 465                             void *data, int *got_sub_ptr, AVPacket *avpkt)
 466 {
 467     AVSubtitle *sub = data;
 468     MovTextContext *m = avctx->priv_data;
 469     int ret;
 470     AVBPrint buf;
 471     char *ptr = avpkt->data;
 472     char *end;
 473     int text_length, tsmb_type, ret_tsmb;
 474     uint64_t tsmb_size;
 475     const uint8_t *tsmb;
 476     size_t i;
 477
 478     if (!ptr || avpkt->size < 2)
 479         return AVERROR_INVALIDDATA;
 480
 481     /*
 482      * A packet of size two with value zero is an empty subtitle
 483      * used to mark the end of the previous non-empty subtitle.
 484      * We can just drop them here as we have duration information
 485      * already. If the value is non-zero, then it's technically a
 486      * bad packet.
 487      */
 488     if (avpkt->size == 2)
 489         return AV_RB16(ptr) == 0 ? 0 : AVERROR_INVALIDDATA;
 490
 491     /*
 492      * The first two bytes of the packet are the length of the text string
 493      * In complex cases, there are style descriptors appended to the string
 494      * so we can't just assume the packet size is the string size.
 495      */
 496     text_length = AV_RB16(ptr);
 497     end = ptr + FFMIN(2 + text_length, avpkt->size);
 498     ptr += 2;
 499
 500     mov_text_cleanup(m);
 501
 502     tsmb_size = 0;
 503     m->tracksize = 2 + text_length;
 504     m->style_entries = 0;
 505     m->box_flags = 0;
 506     m->count_s = 0;
 507     // Note that the spec recommends lines be no longer than 2048 characters.
 508     av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
 509     if (text_length + 2 != avpkt->size) {
 510         while (m->tracksize + 8 <= avpkt->size) {
 511             // A box is a minimum of 8 bytes.
 512             tsmb = ptr + m->tracksize - 2;
 513             tsmb_size = AV_RB32(tsmb);
 514             tsmb += 4;
 515             tsmb_type = AV_RB32(tsmb);
 516             tsmb += 4;
 517
 518             if (tsmb_size == 1) {
 519                 if (m->tracksize + 16 > avpkt->size)
 520                     break;
 521                 tsmb_size = AV_RB64(tsmb);
 522                 tsmb += 8;
 523                 m->size_var = 16;
 524             } else
 525                 m->size_var = 8;
 526             //size_var is equal to 8 or 16 depending on the size of box
 527
 528             if (tsmb_size == 0) {
 529                 av_log(avctx, AV_LOG_ERROR, "tsmb_size is 0\n");
 530                 return AVERROR_INVALIDDATA;
 531             }
 532
 533             if (tsmb_size > avpkt->size - m->tracksize)
 534                 break;
 535
 536             for (i = 0; i < box_count; i++) {
 537                 if (tsmb_type == box_types[i].type) {
 538                     if (m->tracksize + m->size_var + box_types[i].base_size > avpkt->size)
 539                         break;
 540                     ret_tsmb = box_types[i].decode(tsmb, m, avpkt);
 541                     if (ret_tsmb == -1)
 542                         break;
 543                 }
 544             }
 545             m->tracksize = m->tracksize + tsmb_size;
 546         }
 547         text_to_ass(&buf, ptr, end, avctx);
 548         mov_text_cleanup(m);
 549     } else
 550         text_to_ass(&buf, ptr, end, avctx);
 551
 552     ret = ff_ass_add_rect(sub, buf.str, m->readorder++, 0, NULL, NULL);
 553     av_bprint_finalize(&buf, NULL);
 554     if (ret < 0)
 555         return ret;
 556     *got_sub_ptr = sub->num_rects > 0;
 557     return avpkt->size;
 558 }
 559
 560 static int mov_text_decode_close(AVCodecContext *avctx)
 561 {
 562     MovTextContext *m = avctx->priv_data;
 563     mov_text_cleanup_ftab(m);
 564     mov_text_cleanup(m);
 565     return 0;
 566 }
 567
 568 static void mov_text_flush(AVCodecContext *avctx)
 569 {
 570     MovTextContext *m = avctx->priv_data;
 571     if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
 572         m->readorder = 0;
 573 }
 574
 575 AVCodec ff_movtext_decoder = {
 576     .name         = "mov_text",
 577     .long_name    = NULL_IF_CONFIG_SMALL("3GPP Timed Text subtitle"),
 578     .type         = AVMEDIA_TYPE_SUBTITLE,
 579     .id           = AV_CODEC_ID_MOV_TEXT,
 580     .priv_data_size = sizeof(MovTextContext),
 581     .init         = mov_text_init,
 582     .decode       = mov_text_decode_frame,
 583     .close        = mov_text_decode_close,
 584     .flush        = mov_text_flush,
 585 };