git.sesse.net Git - ffmpeg/blob - libavcodec/movtextdec.c

   1 /*
   2  * 3GPP TS 26.245 Timed Text decoder
   3  * Copyright (c) 2012  Philip Langdale <philipl@overt.org>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "avcodec.h"
  23 #include "ass.h"
  24 #include "libavutil/avstring.h"
  25 #include "libavutil/common.h"
  26 #include "libavutil/bprint.h"
  27 #include "libavutil/intreadwrite.h"
  28 #include "libavutil/mem.h"
  29
  30 #define STYLE_FLAG_BOLD         (1<<0)
  31 #define STYLE_FLAG_ITALIC       (1<<1)
  32 #define STYLE_FLAG_UNDERLINE    (1<<2)
  33
  34 #define STYL_BOX   (1<<0)
  35 #define HLIT_BOX   (1<<1)
  36 #define HCLR_BOX   (1<<2)
  37
  38 typedef struct {
  39     uint16_t style_start;
  40     uint16_t style_end;
  41     uint8_t style_flag;
  42     uint8_t fontsize;
  43 } StyleBox;
  44
  45 typedef struct {
  46     uint16_t hlit_start;
  47     uint16_t hlit_end;
  48 } HighlightBox;
  49
  50 typedef struct {
  51    uint8_t hlit_color[4];
  52 } HilightcolorBox;
  53
  54 typedef struct {
  55     StyleBox **s;
  56     StyleBox *s_temp;
  57     HighlightBox h;
  58     HilightcolorBox c;
  59     uint8_t box_flags;
  60     uint16_t style_entries;
  61     uint64_t tracksize;
  62     int size_var;
  63     int count_s;
  64 } MovTextContext;
  65
  66 typedef struct {
  67     uint32_t type;
  68     size_t base_size;
  69     int (*decode)(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt);
  70 } Box;
  71
  72 static void mov_text_cleanup(MovTextContext *m)
  73 {
  74     int i;
  75     if (m->box_flags & STYL_BOX) {
  76         for(i = 0; i < m->count_s; i++) {
  77             av_freep(&m->s[i]);
  78         }
  79         av_freep(&m->s);
  80     }
  81 }
  82
  83 static int decode_hlit(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
  84 {
  85     m->box_flags |= HLIT_BOX;
  86     m->h.hlit_start = AV_RB16(tsmb);
  87     tsmb += 2;
  88     m->h.hlit_end = AV_RB16(tsmb);
  89     tsmb += 2;
  90     return 0;
  91 }
  92
  93 static int decode_hclr(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
  94 {
  95     m->box_flags |= HCLR_BOX;
  96     memcpy(m->c.hlit_color, tsmb, 4);
  97     tsmb += 4;
  98     return 0;
  99 }
 100
 101 static int decode_styl(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
 102 {
 103     int i;
 104     m->style_entries = AV_RB16(tsmb);
 105     tsmb += 2;
 106     // A single style record is of length 12 bytes.
 107     if (m->tracksize + m->size_var + 2 + m->style_entries * 12 > avpkt->size)
 108         return -1;
 109
 110     m->box_flags |= STYL_BOX;
 111     for(i = 0; i < m->style_entries; i++) {
 112         m->s_temp = av_malloc(sizeof(*m->s_temp));
 113         if (!m->s_temp) {
 114             mov_text_cleanup(m);
 115             return AVERROR(ENOMEM);
 116         }
 117         m->s_temp->style_start = AV_RB16(tsmb);
 118         tsmb += 2;
 119         m->s_temp->style_end = AV_RB16(tsmb);
 120         tsmb += 2;
 121         // fontID = AV_RB16(tsmb);
 122         tsmb += 2;
 123         m->s_temp->style_flag = AV_RB8(tsmb);
 124         tsmb++;
 125         m->s_temp->fontsize = AV_RB8(tsmb);
 126         av_dynarray_add(&m->s, &m->count_s, m->s_temp);
 127         if(!m->s) {
 128             mov_text_cleanup(m);
 129             return AVERROR(ENOMEM);
 130         }
 131         tsmb++;
 132         // text-color-rgba
 133         tsmb += 4;
 134     }
 135     return 0;
 136 }
 137
 138 static const Box box_types[] = {
 139     { MKBETAG('s','t','y','l'), 2, decode_styl },
 140     { MKBETAG('h','l','i','t'), 4, decode_hlit },
 141     { MKBETAG('h','c','l','r'), 4, decode_hclr }
 142 };
 143
 144 const static size_t box_count = FF_ARRAY_ELEMS(box_types);
 145
 146 static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end,
 147                         MovTextContext *m)
 148 {
 149     int i = 0;
 150     int text_pos = 0;
 151     while (text < text_end) {
 152         if (m->box_flags & STYL_BOX) {
 153             for (i = 0; i < m->style_entries; i++) {
 154                 if (m->s[i]->style_flag && text_pos == m->s[i]->style_end) {
 155                     av_bprintf(buf, "{\\r}");
 156                 }
 157             }
 158             for (i = 0; i < m->style_entries; i++) {
 159                 if (m->s[i]->style_flag && text_pos == m->s[i]->style_start) {
 160                     if (m->s[i]->style_flag & STYLE_FLAG_BOLD)
 161                         av_bprintf(buf, "{\\b1}");
 162                     if (m->s[i]->style_flag & STYLE_FLAG_ITALIC)
 163                         av_bprintf(buf, "{\\i1}");
 164                     if (m->s[i]->style_flag & STYLE_FLAG_UNDERLINE)
 165                         av_bprintf(buf, "{\\u1}");
 166                     av_bprintf(buf, "{\\fs%d}", m->s[i]->fontsize);
 167                 }
 168             }
 169         }
 170         if (m->box_flags & HLIT_BOX) {
 171             if (text_pos == m->h.hlit_start) {
 172                 /* If hclr box is present, set the secondary color to the color
 173                  * specified. Otherwise, set primary color to white and secondary
 174                  * color to black. These colors will come from TextSampleModifier
 175                  * boxes in future and inverse video technique for highlight will
 176                  * be implemented.
 177                  */
 178                 if (m->box_flags & HCLR_BOX) {
 179                     av_bprintf(buf, "{\\2c&H%02x%02x%02x&}", m->c.hlit_color[2],
 180                                 m->c.hlit_color[1], m->c.hlit_color[0]);
 181                 } else {
 182                     av_bprintf(buf, "{\\1c&H000000&}{\\2c&HFFFFFF&}");
 183                 }
 184             }
 185             if (text_pos == m->h.hlit_end) {
 186                 if (m->box_flags & HCLR_BOX) {
 187                     av_bprintf(buf, "{\\2c&H000000&}");
 188                 } else {
 189                     av_bprintf(buf, "{\\1c&HFFFFFF&}{\\2c&H000000&}");
 190                 }
 191             }
 192         }
 193
 194         switch (*text) {
 195         case '\r':
 196             break;
 197         case '\n':
 198             av_bprintf(buf, "\\N");
 199             break;
 200         default:
 201             av_bprint_chars(buf, *text, 1);
 202             break;
 203         }
 204         text++;
 205         text_pos++;
 206     }
 207
 208     return 0;
 209 }
 210
 211 static int mov_text_init(AVCodecContext *avctx) {
 212     /*
 213      * TODO: Handle the default text style.
 214      * NB: Most players ignore styles completely, with the result that
 215      * it's very common to find files where the default style is broken
 216      * and respecting it results in a worse experience than ignoring it.
 217      */
 218     return ff_ass_subtitle_header_default(avctx);
 219 }
 220
 221 static int mov_text_decode_frame(AVCodecContext *avctx,
 222                             void *data, int *got_sub_ptr, AVPacket *avpkt)
 223 {
 224     AVSubtitle *sub = data;
 225     MovTextContext *m = avctx->priv_data;
 226     int ret, ts_start, ts_end;
 227     AVBPrint buf;
 228     char *ptr = avpkt->data;
 229     char *end;
 230     int text_length, tsmb_type, ret_tsmb;
 231     uint64_t tsmb_size;
 232     const uint8_t *tsmb;
 233
 234     if (!ptr || avpkt->size < 2)
 235         return AVERROR_INVALIDDATA;
 236
 237     /*
 238      * A packet of size two with value zero is an empty subtitle
 239      * used to mark the end of the previous non-empty subtitle.
 240      * We can just drop them here as we have duration information
 241      * already. If the value is non-zero, then it's technically a
 242      * bad packet.
 243      */
 244     if (avpkt->size == 2)
 245         return AV_RB16(ptr) == 0 ? 0 : AVERROR_INVALIDDATA;
 246
 247     /*
 248      * The first two bytes of the packet are the length of the text string
 249      * In complex cases, there are style descriptors appended to the string
 250      * so we can't just assume the packet size is the string size.
 251      */
 252     text_length = AV_RB16(ptr);
 253     end = ptr + FFMIN(2 + text_length, avpkt->size);
 254     ptr += 2;
 255
 256     ts_start = av_rescale_q(avpkt->pts,
 257                             avctx->time_base,
 258                             (AVRational){1,100});
 259     ts_end   = av_rescale_q(avpkt->pts + avpkt->duration,
 260                             avctx->time_base,
 261                             (AVRational){1,100});
 262
 263     tsmb_size = 0;
 264     m->tracksize = 2 + text_length;
 265     m->style_entries = 0;
 266     m->box_flags = 0;
 267     m->count_s = 0;
 268     // Note that the spec recommends lines be no longer than 2048 characters.
 269     av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
 270     if (text_length + 2 != avpkt->size) {
 271         while (m->tracksize + 8 <= avpkt->size) {
 272             // A box is a minimum of 8 bytes.
 273             tsmb = ptr + m->tracksize - 2;
 274             tsmb_size = AV_RB32(tsmb);
 275             tsmb += 4;
 276             tsmb_type = AV_RB32(tsmb);
 277             tsmb += 4;
 278
 279             if (tsmb_size == 1) {
 280                 if (m->tracksize + 16 > avpkt->size)
 281                     break;
 282                 tsmb_size = AV_RB64(tsmb);
 283                 tsmb += 8;
 284                 m->size_var = 16;
 285             } else
 286                 m->size_var = 8;
 287             //size_var is equal to 8 or 16 depending on the size of box
 288
 289             if (m->tracksize + tsmb_size > avpkt->size)
 290                 break;
 291
 292             for (size_t i = 0; i < box_count; i++) {
 293                 if (tsmb_type == box_types[i].type) {
 294                     if (m->tracksize + m->size_var + box_types[i].base_size > avpkt->size)
 295                         break;
 296                     ret_tsmb = box_types[i].decode(tsmb, m, avpkt);
 297                     if (ret_tsmb == -1)
 298                         break;
 299                 }
 300             }
 301             m->tracksize = m->tracksize + tsmb_size;
 302         }
 303         text_to_ass(&buf, ptr, end, m);
 304         mov_text_cleanup(m);
 305     } else
 306         text_to_ass(&buf, ptr, end, m);
 307
 308     ret = ff_ass_add_rect_bprint(sub, &buf, ts_start, ts_end - ts_start);
 309     av_bprint_finalize(&buf, NULL);
 310     if (ret < 0)
 311         return ret;
 312     *got_sub_ptr = sub->num_rects > 0;
 313     return avpkt->size;
 314 }
 315
 316 AVCodec ff_movtext_decoder = {
 317     .name         = "mov_text",
 318     .long_name    = NULL_IF_CONFIG_SMALL("3GPP Timed Text subtitle"),
 319     .type         = AVMEDIA_TYPE_SUBTITLE,
 320     .id           = AV_CODEC_ID_MOV_TEXT,
 321     .priv_data_size = sizeof(MovTextContext),
 322     .init         = mov_text_init,
 323     .decode       = mov_text_decode_frame,
 324 };