git.sesse.net Git - ffmpeg/blob - libavcodec/movtextdec.c

   1 /*
   2  * 3GPP TS 26.245 Timed Text decoder
   3  * Copyright (c) 2012  Philip Langdale <philipl@overt.org>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "avcodec.h"
  23 #include "ass.h"
  24 #include "libavutil/avstring.h"
  25 #include "libavutil/common.h"
  26 #include "libavutil/bprint.h"
  27 #include "libavutil/intreadwrite.h"
  28 #include "libavutil/mem.h"
  29
  30 #define STYLE_FLAG_BOLD         (1<<0)
  31 #define STYLE_FLAG_ITALIC       (1<<1)
  32 #define STYLE_FLAG_UNDERLINE    (1<<2)
  33
  34 #define BOX_SIZE_INITIAL    40
  35
  36 #define STYL_BOX   (1<<0)
  37 #define HLIT_BOX   (1<<1)
  38 #define HCLR_BOX   (1<<2)
  39 #define TWRP_BOX   (1<<3)
  40
  41 #define BOTTOM_LEFT     1
  42 #define BOTTOM_CENTER   2
  43 #define BOTTOM_RIGHT    3
  44 #define MIDDLE_LEFT     4
  45 #define MIDDLE_CENTER   5
  46 #define MIDDLE_RIGHT    6
  47 #define TOP_LEFT        7
  48 #define TOP_CENTER      8
  49 #define TOP_RIGHT       9
  50
  51 typedef struct {
  52     char *font;
  53     int fontsize;
  54     int color;
  55     int back_color;
  56     int bold;
  57     int italic;
  58     int underline;
  59     int alignment;
  60 } MovTextDefault;
  61
  62 typedef struct {
  63     uint16_t fontID;
  64     char *font;
  65 } FontRecord;
  66
  67 typedef struct {
  68     uint16_t style_start;
  69     uint16_t style_end;
  70     uint8_t style_flag;
  71     uint8_t fontsize;
  72     uint16_t style_fontID;
  73 } StyleBox;
  74
  75 typedef struct {
  76     uint16_t hlit_start;
  77     uint16_t hlit_end;
  78 } HighlightBox;
  79
  80 typedef struct {
  81    uint8_t hlit_color[4];
  82 } HilightcolorBox;
  83
  84 typedef struct {
  85     uint8_t wrap_flag;
  86 } TextWrapBox;
  87
  88 typedef struct {
  89     StyleBox **s;
  90     StyleBox *s_temp;
  91     HighlightBox h;
  92     HilightcolorBox c;
  93     FontRecord **ftab;
  94     FontRecord *ftab_temp;
  95     TextWrapBox w;
  96     MovTextDefault d;
  97     uint8_t box_flags;
  98     uint16_t style_entries, ftab_entries;
  99     uint64_t tracksize;
 100     int size_var;
 101     int count_s, count_f;
 102     int readorder;
 103 } MovTextContext;
 104
 105 typedef struct {
 106     uint32_t type;
 107     size_t base_size;
 108     int (*decode)(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt);
 109 } Box;
 110
 111 static void mov_text_cleanup(MovTextContext *m)
 112 {
 113     int i;
 114     if (m->box_flags & STYL_BOX) {
 115         for(i = 0; i < m->count_s; i++) {
 116             av_freep(&m->s[i]);
 117         }
 118         av_freep(&m->s);
 119         m->count_s = 0;
 120         m->style_entries = 0;
 121     }
 122 }
 123
 124 static void mov_text_cleanup_ftab(MovTextContext *m)
 125 {
 126     int i;
 127     if (m->ftab_temp)
 128         av_freep(&m->ftab_temp->font);
 129     av_freep(&m->ftab_temp);
 130     if (m->ftab) {
 131         for(i = 0; i < m->count_f; i++) {
 132             av_freep(&m->ftab[i]->font);
 133             av_freep(&m->ftab[i]);
 134         }
 135     }
 136     av_freep(&m->ftab);
 137 }
 138
 139 static int mov_text_tx3g(AVCodecContext *avctx, MovTextContext *m)
 140 {
 141     uint8_t *tx3g_ptr = avctx->extradata;
 142     int i, box_size, font_length;
 143     int8_t v_align, h_align;
 144     int style_fontID;
 145     StyleBox s_default;
 146
 147     m->count_f = 0;
 148     m->ftab_entries = 0;
 149     box_size = BOX_SIZE_INITIAL; /* Size till ftab_entries */
 150     if (avctx->extradata_size < box_size)
 151         return -1;
 152
 153     // Display Flags
 154     tx3g_ptr += 4;
 155     // Alignment
 156     h_align = *tx3g_ptr++;
 157     v_align = *tx3g_ptr++;
 158     if (h_align == 0) {
 159         if (v_align == 0)
 160             m->d.alignment = TOP_LEFT;
 161         if (v_align == 1)
 162             m->d.alignment = MIDDLE_LEFT;
 163         if (v_align == -1)
 164             m->d.alignment = BOTTOM_LEFT;
 165     }
 166     if (h_align == 1) {
 167         if (v_align == 0)
 168             m->d.alignment = TOP_CENTER;
 169         if (v_align == 1)
 170             m->d.alignment = MIDDLE_CENTER;
 171         if (v_align == -1)
 172             m->d.alignment = BOTTOM_CENTER;
 173     }
 174     if (h_align == -1) {
 175         if (v_align == 0)
 176             m->d.alignment = TOP_RIGHT;
 177         if (v_align == 1)
 178             m->d.alignment = MIDDLE_RIGHT;
 179         if (v_align == -1)
 180             m->d.alignment = BOTTOM_RIGHT;
 181     }
 182     // Background Color
 183     m->d.back_color = AV_RB24(tx3g_ptr);
 184     tx3g_ptr += 4;
 185     // BoxRecord
 186     tx3g_ptr += 8;
 187     // StyleRecord
 188     tx3g_ptr += 4;
 189     // fontID
 190     style_fontID = AV_RB16(tx3g_ptr);
 191     tx3g_ptr += 2;
 192     // face-style-flags
 193     s_default.style_flag = *tx3g_ptr++;
 194     m->d.bold = s_default.style_flag & STYLE_FLAG_BOLD;
 195     m->d.italic = s_default.style_flag & STYLE_FLAG_ITALIC;
 196     m->d.underline = s_default.style_flag & STYLE_FLAG_UNDERLINE;
 197     // fontsize
 198     m->d.fontsize = *tx3g_ptr++;
 199     // Primary color
 200     m->d.color = AV_RB24(tx3g_ptr);
 201     tx3g_ptr += 4;
 202     // FontRecord
 203     // FontRecord Size
 204     tx3g_ptr += 4;
 205     // ftab
 206     tx3g_ptr += 4;
 207
 208     m->ftab_entries = AV_RB16(tx3g_ptr);
 209     tx3g_ptr += 2;
 210
 211     for (i = 0; i < m->ftab_entries; i++) {
 212
 213         box_size += 3;
 214         if (avctx->extradata_size < box_size) {
 215             mov_text_cleanup_ftab(m);
 216             m->ftab_entries = 0;
 217             return -1;
 218         }
 219         m->ftab_temp = av_mallocz(sizeof(*m->ftab_temp));
 220         if (!m->ftab_temp) {
 221             mov_text_cleanup_ftab(m);
 222             return AVERROR(ENOMEM);
 223         }
 224         m->ftab_temp->fontID = AV_RB16(tx3g_ptr);
 225         tx3g_ptr += 2;
 226         font_length = *tx3g_ptr++;
 227
 228         box_size = box_size + font_length;
 229         if (avctx->extradata_size < box_size) {
 230             mov_text_cleanup_ftab(m);
 231             m->ftab_entries = 0;
 232             return -1;
 233         }
 234         m->ftab_temp->font = av_malloc(font_length + 1);
 235         if (!m->ftab_temp->font) {
 236             mov_text_cleanup_ftab(m);
 237             return AVERROR(ENOMEM);
 238         }
 239         memcpy(m->ftab_temp->font, tx3g_ptr, font_length);
 240         m->ftab_temp->font[font_length] = '\0';
 241         av_dynarray_add(&m->ftab, &m->count_f, m->ftab_temp);
 242         if (!m->ftab) {
 243             mov_text_cleanup_ftab(m);
 244             return AVERROR(ENOMEM);
 245         }
 246         m->ftab_temp = NULL;
 247         tx3g_ptr = tx3g_ptr + font_length;
 248     }
 249     for (i = 0; i < m->ftab_entries; i++) {
 250         if (style_fontID == m->ftab[i]->fontID)
 251             m->d.font = m->ftab[i]->font;
 252     }
 253     return 0;
 254 }
 255
 256 static int decode_twrp(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
 257 {
 258     m->box_flags |= TWRP_BOX;
 259     m->w.wrap_flag = *tsmb++;
 260     return 0;
 261 }
 262
 263 static int decode_hlit(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
 264 {
 265     m->box_flags |= HLIT_BOX;
 266     m->h.hlit_start = AV_RB16(tsmb);
 267     tsmb += 2;
 268     m->h.hlit_end = AV_RB16(tsmb);
 269     tsmb += 2;
 270     return 0;
 271 }
 272
 273 static int decode_hclr(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
 274 {
 275     m->box_flags |= HCLR_BOX;
 276     memcpy(m->c.hlit_color, tsmb, 4);
 277     tsmb += 4;
 278     return 0;
 279 }
 280
 281 static int decode_styl(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
 282 {
 283     int i;
 284     int style_entries = AV_RB16(tsmb);
 285     tsmb += 2;
 286     // A single style record is of length 12 bytes.
 287     if (m->tracksize + m->size_var + 2 + style_entries * 12 > avpkt->size)
 288         return -1;
 289
 290     m->style_entries = style_entries;
 291
 292     m->box_flags |= STYL_BOX;
 293     for(i = 0; i < m->style_entries; i++) {
 294         m->s_temp = av_malloc(sizeof(*m->s_temp));
 295         if (!m->s_temp) {
 296             mov_text_cleanup(m);
 297             return AVERROR(ENOMEM);
 298         }
 299         m->s_temp->style_start = AV_RB16(tsmb);
 300         tsmb += 2;
 301         m->s_temp->style_end = AV_RB16(tsmb);
 302
 303         if (   m->s_temp->style_end < m->s_temp->style_start
 304             || (m->count_s && m->s_temp->style_start < m->s[m->count_s - 1]->style_end)) {
 305             av_freep(&m->s_temp);
 306             mov_text_cleanup(m);
 307             return AVERROR(ENOMEM);
 308         }
 309
 310         tsmb += 2;
 311         m->s_temp->style_fontID = AV_RB16(tsmb);
 312         tsmb += 2;
 313         m->s_temp->style_flag = AV_RB8(tsmb);
 314         tsmb++;
 315         m->s_temp->fontsize = AV_RB8(tsmb);
 316         av_dynarray_add(&m->s, &m->count_s, m->s_temp);
 317         if(!m->s) {
 318             mov_text_cleanup(m);
 319             return AVERROR(ENOMEM);
 320         }
 321         tsmb++;
 322         // text-color-rgba
 323         tsmb += 4;
 324     }
 325     return 0;
 326 }
 327
 328 static const Box box_types[] = {
 329     { MKBETAG('s','t','y','l'), 2, decode_styl },
 330     { MKBETAG('h','l','i','t'), 4, decode_hlit },
 331     { MKBETAG('h','c','l','r'), 4, decode_hclr },
 332     { MKBETAG('t','w','r','p'), 1, decode_twrp }
 333 };
 334
 335 const static size_t box_count = FF_ARRAY_ELEMS(box_types);
 336
 337 // Return byte length of the UTF-8 sequence starting at text[0]. 0 on error.
 338 static int get_utf8_length_at(const char *text, const char *text_end)
 339 {
 340     const char *start = text;
 341     int err = 0;
 342     uint32_t c;
 343     GET_UTF8(c, text < text_end ? (uint8_t)*text++ : (err = 1, 0), goto error;);
 344     if (err)
 345         goto error;
 346     return text - start;
 347 error:
 348     return 0;
 349 }
 350
 351 static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end,
 352                        AVCodecContext *avctx)
 353 {
 354     MovTextContext *m = avctx->priv_data;
 355     int i = 0;
 356     int j = 0;
 357     int text_pos = 0;
 358
 359     if (text < text_end && m->box_flags & TWRP_BOX) {
 360         if (m->w.wrap_flag == 1) {
 361             av_bprintf(buf, "{\\q1}"); /* End of line wrap */
 362         } else {
 363             av_bprintf(buf, "{\\q2}"); /* No wrap */
 364         }
 365     }
 366
 367     while (text < text_end) {
 368         int len;
 369
 370         if (m->box_flags & STYL_BOX) {
 371             for (i = 0; i < m->style_entries; i++) {
 372                 if (m->s[i]->style_flag && text_pos == m->s[i]->style_end) {
 373                     av_bprintf(buf, "{\\r}");
 374                 }
 375             }
 376             for (i = 0; i < m->style_entries; i++) {
 377                 if (m->s[i]->style_flag && text_pos == m->s[i]->style_start) {
 378                     if (m->s[i]->style_flag & STYLE_FLAG_BOLD)
 379                         av_bprintf(buf, "{\\b1}");
 380                     if (m->s[i]->style_flag & STYLE_FLAG_ITALIC)
 381                         av_bprintf(buf, "{\\i1}");
 382                     if (m->s[i]->style_flag & STYLE_FLAG_UNDERLINE)
 383                         av_bprintf(buf, "{\\u1}");
 384                     av_bprintf(buf, "{\\fs%d}", m->s[i]->fontsize);
 385                     for (j = 0; j < m->ftab_entries; j++) {
 386                         if (m->s[i]->style_fontID == m->ftab[j]->fontID)
 387                             av_bprintf(buf, "{\\fn%s}", m->ftab[j]->font);
 388                     }
 389                 }
 390             }
 391         }
 392         if (m->box_flags & HLIT_BOX) {
 393             if (text_pos == m->h.hlit_start) {
 394                 /* If hclr box is present, set the secondary color to the color
 395                  * specified. Otherwise, set primary color to white and secondary
 396                  * color to black. These colors will come from TextSampleModifier
 397                  * boxes in future and inverse video technique for highlight will
 398                  * be implemented.
 399                  */
 400                 if (m->box_flags & HCLR_BOX) {
 401                     av_bprintf(buf, "{\\2c&H%02x%02x%02x&}", m->c.hlit_color[2],
 402                                 m->c.hlit_color[1], m->c.hlit_color[0]);
 403                 } else {
 404                     av_bprintf(buf, "{\\1c&H000000&}{\\2c&HFFFFFF&}");
 405                 }
 406             }
 407             if (text_pos == m->h.hlit_end) {
 408                 if (m->box_flags & HCLR_BOX) {
 409                     av_bprintf(buf, "{\\2c&H000000&}");
 410                 } else {
 411                     av_bprintf(buf, "{\\1c&HFFFFFF&}{\\2c&H000000&}");
 412                 }
 413             }
 414         }
 415
 416         len = get_utf8_length_at(text, text_end);
 417         if (len < 1) {
 418             av_log(avctx, AV_LOG_ERROR, "invalid UTF-8 byte in subtitle\n");
 419             len = 1;
 420         }
 421         for (i = 0; i < len; i++) {
 422             switch (*text) {
 423             case '\r':
 424                 break;
 425             case '\n':
 426                 av_bprintf(buf, "\\N");
 427                 break;
 428             default:
 429                 av_bprint_chars(buf, *text, 1);
 430                 break;
 431             }
 432             text++;
 433         }
 434         text_pos++;
 435     }
 436
 437     return 0;
 438 }
 439
 440 static int mov_text_init(AVCodecContext *avctx) {
 441     /*
 442      * TODO: Handle the default text style.
 443      * NB: Most players ignore styles completely, with the result that
 444      * it's very common to find files where the default style is broken
 445      * and respecting it results in a worse experience than ignoring it.
 446      */
 447     int ret;
 448     MovTextContext *m = avctx->priv_data;
 449     ret = mov_text_tx3g(avctx, m);
 450     if (ret == 0) {
 451         return ff_ass_subtitle_header(avctx, m->d.font, m->d.fontsize, m->d.color,
 452                                 m->d.back_color, m->d.bold, m->d.italic,
 453                                 m->d.underline, ASS_DEFAULT_BORDERSTYLE,
 454                                 m->d.alignment);
 455     } else
 456         return ff_ass_subtitle_header_default(avctx);
 457 }
 458
 459 static int mov_text_decode_frame(AVCodecContext *avctx,
 460                             void *data, int *got_sub_ptr, AVPacket *avpkt)
 461 {
 462     AVSubtitle *sub = data;
 463     MovTextContext *m = avctx->priv_data;
 464     int ret;
 465     AVBPrint buf;
 466     char *ptr = avpkt->data;
 467     char *end;
 468     int text_length, tsmb_type, ret_tsmb;
 469     uint64_t tsmb_size;
 470     const uint8_t *tsmb;
 471     size_t i;
 472
 473     if (!ptr || avpkt->size < 2)
 474         return AVERROR_INVALIDDATA;
 475
 476     /*
 477      * A packet of size two with value zero is an empty subtitle
 478      * used to mark the end of the previous non-empty subtitle.
 479      * We can just drop them here as we have duration information
 480      * already. If the value is non-zero, then it's technically a
 481      * bad packet.
 482      */
 483     if (avpkt->size == 2)
 484         return AV_RB16(ptr) == 0 ? 0 : AVERROR_INVALIDDATA;
 485
 486     /*
 487      * The first two bytes of the packet are the length of the text string
 488      * In complex cases, there are style descriptors appended to the string
 489      * so we can't just assume the packet size is the string size.
 490      */
 491     text_length = AV_RB16(ptr);
 492     end = ptr + FFMIN(2 + text_length, avpkt->size);
 493     ptr += 2;
 494
 495     mov_text_cleanup(m);
 496
 497     tsmb_size = 0;
 498     m->tracksize = 2 + text_length;
 499     m->style_entries = 0;
 500     m->box_flags = 0;
 501     m->count_s = 0;
 502     // Note that the spec recommends lines be no longer than 2048 characters.
 503     av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
 504     if (text_length + 2 != avpkt->size) {
 505         while (m->tracksize + 8 <= avpkt->size) {
 506             // A box is a minimum of 8 bytes.
 507             tsmb = ptr + m->tracksize - 2;
 508             tsmb_size = AV_RB32(tsmb);
 509             tsmb += 4;
 510             tsmb_type = AV_RB32(tsmb);
 511             tsmb += 4;
 512
 513             if (tsmb_size == 1) {
 514                 if (m->tracksize + 16 > avpkt->size)
 515                     break;
 516                 tsmb_size = AV_RB64(tsmb);
 517                 tsmb += 8;
 518                 m->size_var = 16;
 519             } else
 520                 m->size_var = 8;
 521             //size_var is equal to 8 or 16 depending on the size of box
 522
 523             if (tsmb_size == 0) {
 524                 av_log(avctx, AV_LOG_ERROR, "tsmb_size is 0\n");
 525                 return AVERROR_INVALIDDATA;
 526             }
 527
 528             if (tsmb_size > avpkt->size - m->tracksize)
 529                 break;
 530
 531             for (i = 0; i < box_count; i++) {
 532                 if (tsmb_type == box_types[i].type) {
 533                     if (m->tracksize + m->size_var + box_types[i].base_size > avpkt->size)
 534                         break;
 535                     ret_tsmb = box_types[i].decode(tsmb, m, avpkt);
 536                     if (ret_tsmb == -1)
 537                         break;
 538                 }
 539             }
 540             m->tracksize = m->tracksize + tsmb_size;
 541         }
 542         text_to_ass(&buf, ptr, end, avctx);
 543         mov_text_cleanup(m);
 544     } else
 545         text_to_ass(&buf, ptr, end, avctx);
 546
 547     ret = ff_ass_add_rect(sub, buf.str, m->readorder++, 0, NULL, NULL);
 548     av_bprint_finalize(&buf, NULL);
 549     if (ret < 0)
 550         return ret;
 551     *got_sub_ptr = sub->num_rects > 0;
 552     return avpkt->size;
 553 }
 554
 555 static int mov_text_decode_close(AVCodecContext *avctx)
 556 {
 557     MovTextContext *m = avctx->priv_data;
 558     mov_text_cleanup_ftab(m);
 559     mov_text_cleanup(m);
 560     return 0;
 561 }
 562
 563 static void mov_text_flush(AVCodecContext *avctx)
 564 {
 565     MovTextContext *m = avctx->priv_data;
 566     if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
 567         m->readorder = 0;
 568 }
 569
 570 AVCodec ff_movtext_decoder = {
 571     .name         = "mov_text",
 572     .long_name    = NULL_IF_CONFIG_SMALL("3GPP Timed Text subtitle"),
 573     .type         = AVMEDIA_TYPE_SUBTITLE,
 574     .id           = AV_CODEC_ID_MOV_TEXT,
 575     .priv_data_size = sizeof(MovTextContext),
 576     .init         = mov_text_init,
 577     .decode       = mov_text_decode_frame,
 578     .close        = mov_text_decode_close,
 579     .flush        = mov_text_flush,
 580 };