3 * Copyright (c) 2003 Fabrice Bellard
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "libavutil/avstring.h"
25 #include "libavutil/intreadwrite.h"
26 #include "libavutil/dict.h"
27 #include "avio_internal.h"
29 int ff_id3v2_match(const uint8_t *buf, const char * magic)
31 return buf[0] == magic[0] &&
36 (buf[6] & 0x80) == 0 &&
37 (buf[7] & 0x80) == 0 &&
38 (buf[8] & 0x80) == 0 &&
42 int ff_id3v2_tag_len(const uint8_t * buf)
44 int len = ((buf[6] & 0x7f) << 21) +
45 ((buf[7] & 0x7f) << 14) +
46 ((buf[8] & 0x7f) << 7) +
50 len += ID3v2_HEADER_SIZE;
54 static unsigned int get_size(AVIOContext *s, int len)
58 v = (v << 7) + (avio_r8(s) & 0x7F);
63 * Free GEOB type extra metadata.
65 static void free_geobtag(ID3v2ExtraMetaGEOB *geob)
67 av_free(geob->mime_type);
68 av_free(geob->file_name);
69 av_free(geob->description);
75 * Decode characters to UTF-8 according to encoding type. The decoded buffer is
76 * always null terminated. Stop reading when either *maxread bytes are read from
77 * pb or U+0000 character is found.
79 * @param dst Pointer where the address of the buffer with the decoded bytes is
80 * stored. Buffer must be freed by caller.
81 * @param maxread Pointer to maximum number of characters to read from the
82 * AVIOContext. After execution the value is decremented by the number of bytes
84 * @returns 0 if no error occured, dst is uninitialized on error
86 static int decode_str(AVFormatContext *s, AVIOContext *pb, int encoding,
87 uint8_t **dst, int *maxread)
93 unsigned int (*get)(AVIOContext*) = avio_rb16;
96 if ((ret = avio_open_dyn_buf(&dynbuf)) < 0) {
97 av_log(s, AV_LOG_ERROR, "Error opening memory stream\n");
103 case ID3v2_ENCODING_ISO8859:
106 PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);)
111 case ID3v2_ENCODING_UTF16BOM:
112 if ((left -= 2) < 0) {
113 av_log(s, AV_LOG_ERROR, "Cannot read BOM value, input too short\n");
114 avio_close_dyn_buf(dynbuf, dst);
116 return AVERROR_INVALIDDATA;
118 switch (avio_rb16(pb)) {
124 av_log(s, AV_LOG_ERROR, "Incorrect BOM value\n");
125 avio_close_dyn_buf(dynbuf, dst);
128 return AVERROR_INVALIDDATA;
132 case ID3v2_ENCODING_UTF16BE:
133 while ((left > 1) && ch) {
134 GET_UTF16(ch, ((left -= 2) >= 0 ? get(pb) : 0), break;)
135 PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);)
138 left += 2; /* did not read last char from pb */
141 case ID3v2_ENCODING_UTF8:
149 av_log(s, AV_LOG_WARNING, "Unknown encoding\n");
155 avio_close_dyn_buf(dynbuf, dst);
164 static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const char *key)
167 int encoding, dict_flags = AV_DICT_DONT_OVERWRITE;
173 encoding = avio_r8(pb);
174 taglen--; /* account for encoding type byte */
176 if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
177 av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
181 if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
182 && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1)
183 && genre <= ID3v1_GENRE_MAX) {
185 dst = ff_id3v1_genre_str[genre];
186 } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
187 /* dst now contains the key, need to get value */
189 if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
190 av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
194 dict_flags |= AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_STRDUP_KEY;
197 dict_flags |= AV_DICT_DONT_STRDUP_VAL;
200 av_dict_set(&s->metadata, key, dst, dict_flags);
204 * Parse GEOB tag into a ID3v2ExtraMetaGEOB struct.
206 static void read_geobtag(AVFormatContext *s, AVIOContext *pb, int taglen, char *tag, ID3v2ExtraMeta **extra_meta)
208 ID3v2ExtraMetaGEOB *geob_data = NULL;
209 ID3v2ExtraMeta *new_extra = NULL;
216 geob_data = av_mallocz(sizeof(ID3v2ExtraMetaGEOB));
218 av_log(s, AV_LOG_ERROR, "Failed to alloc %zu bytes\n", sizeof(ID3v2ExtraMetaGEOB));
222 new_extra = av_mallocz(sizeof(ID3v2ExtraMeta));
224 av_log(s, AV_LOG_ERROR, "Failed to alloc %zu bytes\n", sizeof(ID3v2ExtraMeta));
228 /* read encoding type byte */
229 encoding = avio_r8(pb);
232 /* read MIME type (always ISO-8859) */
233 if (decode_str(s, pb, ID3v2_ENCODING_ISO8859, &geob_data->mime_type, &taglen) < 0
238 if (decode_str(s, pb, encoding, &geob_data->file_name, &taglen) < 0
242 /* read content description */
243 if (decode_str(s, pb, encoding, &geob_data->description, &taglen) < 0
248 /* save encapsulated binary data */
249 geob_data->data = av_malloc(taglen);
250 if (!geob_data->data) {
251 av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", taglen);
254 if ((len = avio_read(pb, geob_data->data, taglen)) < taglen)
255 av_log(s, AV_LOG_WARNING, "Error reading GEOB frame, data truncated.\n");
256 geob_data->datasize = len;
258 geob_data->data = NULL;
259 geob_data->datasize = 0;
262 /* add data to the list */
263 new_extra->tag = "GEOB";
264 new_extra->data = geob_data;
265 new_extra->next = *extra_meta;
266 *extra_meta = new_extra;
271 av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", tag);
272 free_geobtag(geob_data);
277 static int is_number(const char *str)
279 while (*str >= '0' && *str <= '9') str++;
283 static AVDictionaryEntry* get_date_tag(AVDictionary *m, const char *tag)
285 AVDictionaryEntry *t;
286 if ((t = av_dict_get(m, tag, NULL, AV_DICT_MATCH_CASE)) &&
287 strlen(t->value) == 4 && is_number(t->value))
292 static void merge_date(AVDictionary **m)
294 AVDictionaryEntry *t;
295 char date[17] = {0}; // YYYY-MM-DD hh:mm
297 if (!(t = get_date_tag(*m, "TYER")) &&
298 !(t = get_date_tag(*m, "TYE")))
300 av_strlcpy(date, t->value, 5);
301 av_dict_set(m, "TYER", NULL, 0);
302 av_dict_set(m, "TYE", NULL, 0);
304 if (!(t = get_date_tag(*m, "TDAT")) &&
305 !(t = get_date_tag(*m, "TDA")))
307 snprintf(date + 4, sizeof(date) - 4, "-%.2s-%.2s", t->value + 2, t->value);
308 av_dict_set(m, "TDAT", NULL, 0);
309 av_dict_set(m, "TDA", NULL, 0);
311 if (!(t = get_date_tag(*m, "TIME")) &&
312 !(t = get_date_tag(*m, "TIM")))
314 snprintf(date + 10, sizeof(date) - 10, " %.2s:%.2s", t->value, t->value + 2);
315 av_dict_set(m, "TIME", NULL, 0);
316 av_dict_set(m, "TIM", NULL, 0);
320 av_dict_set(m, "date", date, 0);
324 * Get the corresponding ID3v2EMFunc struct for a tag.
325 * @param isv34 Determines if v2.2 or v2.3/4 strings are used
326 * @return A pointer to the ID3v2EMFunc struct if found, NULL otherwise.
328 static const ID3v2EMFunc *get_extra_meta_func(const char *tag, int isv34)
331 while (ff_id3v2_extra_meta_funcs[i].tag3) {
334 ff_id3v2_extra_meta_funcs[i].tag4 :
335 ff_id3v2_extra_meta_funcs[i].tag3),
337 return &ff_id3v2_extra_meta_funcs[i];
343 static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags, ID3v2ExtraMeta **extra_meta)
345 int isv34, tlen, unsync;
347 int64_t next, end = avio_tell(s->pb) + len;
349 const char *reason = NULL;
352 unsigned char *buffer = NULL;
354 void (*extra_func)(AVFormatContext*, AVIOContext*, int, char*, ID3v2ExtraMeta**) = NULL;
359 reason = "compression";
377 unsync = flags & 0x80;
379 if (isv34 && flags & 0x40) /* Extended header present, just skip over it */
380 avio_skip(s->pb, get_size(s->pb, 4));
382 while (len >= taghdrlen) {
383 unsigned int tflags = 0;
387 avio_read(s->pb, tag, 4);
390 tlen = avio_rb32(s->pb);
392 tlen = get_size(s->pb, 4);
393 tflags = avio_rb16(s->pb);
394 tunsync = tflags & ID3v2_FLAG_UNSYNCH;
396 avio_read(s->pb, tag, 3);
398 tlen = avio_rb24(s->pb);
400 if (tlen < 0 || tlen > len - taghdrlen) {
401 av_log(s, AV_LOG_WARNING, "Invalid size in frame %s, skipping the rest of tag.\n", tag);
404 len -= taghdrlen + tlen;
405 next = avio_tell(s->pb) + tlen;
409 av_log(s, AV_LOG_DEBUG, "Invalid empty frame %s, skipping.\n", tag);
413 if (tflags & ID3v2_FLAG_DATALEN) {
418 if (tflags & (ID3v2_FLAG_ENCRYPTION | ID3v2_FLAG_COMPRESSION)) {
419 av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag);
420 avio_skip(s->pb, tlen);
421 /* check for text tag or supported special meta tag */
422 } else if (tag[0] == 'T' || (extra_meta && (extra_func = get_extra_meta_func(tag, isv34)->read))) {
423 if (unsync || tunsync) {
425 av_fast_malloc(&buffer, &buffer_size, tlen);
427 av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", tlen);
430 for (i = 0, j = 0; i < tlen; i++, j++) {
431 buffer[j] = avio_r8(s->pb);
432 if (j > 0 && !buffer[j] && buffer[j - 1] == 0xff) {
433 /* Unsynchronised byte, skip it */
437 ffio_init_context(&pb, buffer, j, 0, NULL, NULL, NULL, NULL);
439 pbx = &pb; // read from sync buffer
441 pbx = s->pb; // read straight from input
445 read_ttag(s, pbx, tlen, tag);
447 /* parse special meta tag */
448 extra_func(s, pbx, tlen, tag, extra_meta);
452 av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding");
453 avio_skip(s->pb, tlen);
456 /* Skip to end of tag */
458 avio_seek(s->pb, next, SEEK_SET);
461 if (version == 4 && flags & 0x10) /* Footer preset, always 10 bytes, skip over it */
466 av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n", version, reason);
467 avio_seek(s->pb, end, SEEK_SET);
472 void ff_id3v2_read_all(AVFormatContext *s, const char *magic, ID3v2ExtraMeta **extra_meta)
475 uint8_t buf[ID3v2_HEADER_SIZE];
480 /* save the current offset in case there's nothing to read/skip */
481 off = avio_tell(s->pb);
482 ret = avio_read(s->pb, buf, ID3v2_HEADER_SIZE);
483 if (ret != ID3v2_HEADER_SIZE)
485 found_header = ff_id3v2_match(buf, magic);
487 /* parse ID3v2 header */
488 len = ((buf[6] & 0x7f) << 21) |
489 ((buf[7] & 0x7f) << 14) |
490 ((buf[8] & 0x7f) << 7) |
492 ff_id3v2_parse(s, len, buf[3], buf[5], extra_meta);
494 avio_seek(s->pb, off, SEEK_SET);
496 } while (found_header);
497 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_34_metadata_conv);
498 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_2_metadata_conv);
499 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_4_metadata_conv);
500 merge_date(&s->metadata);
503 void ff_id3v2_read(AVFormatContext *s, const char *magic)
505 ff_id3v2_read_all(s, magic, NULL);
508 void ff_id3v2_free_extra_meta(ID3v2ExtraMeta **extra_meta)
510 ID3v2ExtraMeta *current = *extra_meta, *next;
511 void (*free_func)(ID3v2ExtraMeta*);
514 if ((free_func = get_extra_meta_func(current->tag, 1)->free))
515 free_func(current->data);
516 next = current->next;
522 const ID3v2EMFunc ff_id3v2_extra_meta_funcs[] = {
523 { "GEO", "GEOB", read_geobtag, free_geobtag },
527 const AVMetadataConv ff_id3v2_34_metadata_conv[] = {
529 { "TCOM", "composer"},
531 { "TCOP", "copyright"},
532 { "TENC", "encoded_by"},
534 { "TLAN", "language"},
536 { "TPE2", "album_artist"},
537 { "TPE3", "performer"},
539 { "TPUB", "publisher"},
541 { "TSSE", "encoder"},
545 const AVMetadataConv ff_id3v2_4_metadata_conv[] = {
548 { "TDEN", "creation_time"},
549 { "TSOA", "album-sort"},
550 { "TSOP", "artist-sort"},
551 { "TSOT", "title-sort"},
555 const AVMetadataConv ff_id3v2_2_metadata_conv[] = {
559 { "TEN", "encoded_by"},
561 { "TP2", "album_artist"},
562 { "TP3", "performer"},
568 const char ff_id3v2_tags[][4] = {
569 "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDLY", "TENC", "TEXT",
570 "TFLT", "TIT1", "TIT2", "TIT3", "TKEY", "TLAN", "TLEN", "TMED",
571 "TOAL", "TOFN", "TOLY", "TOPE", "TOWN", "TPE1", "TPE2", "TPE3",
572 "TPE4", "TPOS", "TPUB", "TRCK", "TRSN", "TRSO", "TSRC", "TSSE",
576 const char ff_id3v2_4_tags[][4] = {
577 "TDEN", "TDOR", "TDRC", "TDRL", "TDTG", "TIPL", "TMCL", "TMOO",
578 "TPRO", "TSOA", "TSOP", "TSOT", "TSST",
582 const char ff_id3v2_3_tags[][4] = {
583 "TDAT", "TIME", "TORY", "TRDA", "TSIZ", "TYER",