2 * Copyright (c) 2003 Fabrice Bellard
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * Specifications available at:
26 * http://id3.org/Developer_Information
31 #include "libavutil/avstring.h"
32 #include "libavutil/intreadwrite.h"
33 #include "libavutil/dict.h"
34 #include "avio_internal.h"
36 int ff_id3v2_match(const uint8_t *buf, const char * magic)
38 return buf[0] == magic[0] &&
43 (buf[6] & 0x80) == 0 &&
44 (buf[7] & 0x80) == 0 &&
45 (buf[8] & 0x80) == 0 &&
49 int ff_id3v2_tag_len(const uint8_t * buf)
51 int len = ((buf[6] & 0x7f) << 21) +
52 ((buf[7] & 0x7f) << 14) +
53 ((buf[8] & 0x7f) << 7) +
57 len += ID3v2_HEADER_SIZE;
61 static unsigned int get_size(AVIOContext *s, int len)
65 v = (v << 7) + (avio_r8(s) & 0x7F);
70 * Free GEOB type extra metadata.
72 static void free_geobtag(void *obj)
74 ID3v2ExtraMetaGEOB *geob = obj;
75 av_free(geob->mime_type);
76 av_free(geob->file_name);
77 av_free(geob->description);
83 * Decode characters to UTF-8 according to encoding type. The decoded buffer is
84 * always null terminated.
86 * @param dst Pointer where the address of the buffer with the decoded bytes is
87 * stored. Buffer must be freed by caller.
88 * @param dstlen Pointer to an int where the length of the decoded string
89 * is stored (in bytes, incl. null termination)
90 * @param maxread Pointer to maximum number of characters to read from the
91 * AVIOContext. After execution the value is decremented by the number of bytes
93 * @seeknull If true, decoding stops after the first U+0000 character found, if
94 * there is any before maxread is reached
95 * @returns 0 if no error occured, dst is uninitialized on error
97 static int decode_str(AVFormatContext *s, AVIOContext *pb, int encoding,
98 uint8_t **dst, int *dstlen, int *maxread, const int seeknull)
104 unsigned int (*get)(AVIOContext*) = avio_rb16;
107 if ((ret = avio_open_dyn_buf(&dynbuf)) < 0) {
108 av_log(s, AV_LOG_ERROR, "Error opening memory stream\n");
114 case ID3v2_ENCODING_ISO8859:
115 while (left && (!seeknull || ch)) {
117 PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);)
122 case ID3v2_ENCODING_UTF16BOM:
123 if ((left -= 2) < 0) {
124 av_log(s, AV_LOG_ERROR, "Cannot read BOM value, input too short\n");
125 avio_close_dyn_buf(dynbuf, (uint8_t **)dst);
127 return AVERROR_INVALIDDATA;
129 switch (avio_rb16(pb)) {
135 av_log(s, AV_LOG_ERROR, "Incorrect BOM value\n");
136 avio_close_dyn_buf(dynbuf, (uint8_t **)dst);
139 return AVERROR_INVALIDDATA;
143 case ID3v2_ENCODING_UTF16BE:
144 while ((left > 1) && (!seeknull || ch)) {
145 GET_UTF16(ch, ((left -= 2) >= 0 ? get(pb) : 0), break;)
146 PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);)
149 left += 2; /* did not read last char from pb */
152 case ID3v2_ENCODING_UTF8:
153 while (left && (!seeknull || ch)) {
160 av_log(s, AV_LOG_WARNING, "Unknown encoding\n");
166 len = avio_close_dyn_buf(dynbuf, (uint8_t **)dst);
178 static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const char *key)
181 const char *val = NULL;
188 taglen--; /* account for encoding type byte */
190 if (decode_str(s, pb, avio_r8(pb), &dst, &dstlen, &taglen, 0) < 0) {
191 av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
195 if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
196 && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1)
197 && genre <= ID3v1_GENRE_MAX)
198 val = ff_id3v1_genre_str[genre];
199 else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
200 /* dst now contains two 0-terminated strings */
204 val = dst + FFMIN(len + 1, dstlen);
210 av_dict_set(&s->metadata, key, val, AV_DICT_DONT_OVERWRITE);
216 * Parse GEOB tag into a ID3v2ExtraMetaGEOB struct.
218 static void read_geobtag(AVFormatContext *s, AVIOContext *pb, int taglen, char *tag, ID3v2ExtraMeta **extra_meta)
220 ID3v2ExtraMetaGEOB *geob_data = NULL;
221 ID3v2ExtraMeta *new_extra = NULL;
228 geob_data = av_mallocz(sizeof(ID3v2ExtraMetaGEOB));
230 av_log(s, AV_LOG_ERROR, "Failed to alloc %zu bytes\n", sizeof(ID3v2ExtraMetaGEOB));
234 new_extra = av_mallocz(sizeof(ID3v2ExtraMeta));
236 av_log(s, AV_LOG_ERROR, "Failed to alloc %zu bytes\n", sizeof(ID3v2ExtraMeta));
240 /* read encoding type byte */
241 encoding = avio_r8(pb);
244 /* read MIME type (always ISO-8859) */
245 if (decode_str(s, pb, ID3v2_ENCODING_ISO8859, &geob_data->mime_type, NULL, &taglen, 1) < 0
250 if (decode_str(s, pb, encoding, &geob_data->file_name, NULL, &taglen, 1) < 0
254 /* read content description */
255 if (decode_str(s, pb, encoding, &geob_data->description, NULL, &taglen, 1) < 0
260 /* save encapsulated binary data */
261 geob_data->data = av_malloc(taglen);
262 if (!geob_data->data) {
263 av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", taglen);
266 if ((len = avio_read(pb, geob_data->data, taglen)) < taglen)
267 av_log(s, AV_LOG_WARNING, "Error reading GEOB frame, data truncated.\n");
268 geob_data->datasize = len;
270 geob_data->data = NULL;
271 geob_data->datasize = 0;
274 /* add data to the list */
275 new_extra->tag = "GEOB";
276 new_extra->data = geob_data;
277 new_extra->next = *extra_meta;
278 *extra_meta = new_extra;
283 av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", tag);
284 free_geobtag(geob_data);
289 static int is_number(const char *str)
291 while (*str >= '0' && *str <= '9') str++;
295 static AVDictionaryEntry* get_date_tag(AVDictionary *m, const char *tag)
297 AVDictionaryEntry *t;
298 if ((t = av_dict_get(m, tag, NULL, AV_DICT_MATCH_CASE)) &&
299 strlen(t->value) == 4 && is_number(t->value))
304 static void merge_date(AVDictionary **m)
306 AVDictionaryEntry *t;
307 char date[17] = {0}; // YYYY-MM-DD hh:mm
309 if (!(t = get_date_tag(*m, "TYER")) &&
310 !(t = get_date_tag(*m, "TYE")))
312 av_strlcpy(date, t->value, 5);
313 av_dict_set(m, "TYER", NULL, 0);
314 av_dict_set(m, "TYE", NULL, 0);
316 if (!(t = get_date_tag(*m, "TDAT")) &&
317 !(t = get_date_tag(*m, "TDA")))
319 snprintf(date + 4, sizeof(date) - 4, "-%.2s-%.2s", t->value + 2, t->value);
320 av_dict_set(m, "TDAT", NULL, 0);
321 av_dict_set(m, "TDA", NULL, 0);
323 if (!(t = get_date_tag(*m, "TIME")) &&
324 !(t = get_date_tag(*m, "TIM")))
326 snprintf(date + 10, sizeof(date) - 10, " %.2s:%.2s", t->value, t->value + 2);
327 av_dict_set(m, "TIME", NULL, 0);
328 av_dict_set(m, "TIM", NULL, 0);
332 av_dict_set(m, "date", date, 0);
336 * Get the corresponding ID3v2EMFunc struct for a tag.
337 * @param isv34 Determines if v2.2 or v2.3/4 strings are used
338 * @return A pointer to the ID3v2EMFunc struct if found, NULL otherwise.
340 static const ID3v2EMFunc *get_extra_meta_func(const char *tag, int isv34)
343 while (ff_id3v2_extra_meta_funcs[i].tag3) {
346 ff_id3v2_extra_meta_funcs[i].tag4 :
347 ff_id3v2_extra_meta_funcs[i].tag3),
349 return &ff_id3v2_extra_meta_funcs[i];
352 return &ff_id3v2_extra_meta_funcs[i];
355 static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags, ID3v2ExtraMeta **extra_meta)
360 int64_t next, end = avio_tell(s->pb) + len;
362 const char *reason = NULL;
365 unsigned char *buffer = NULL;
367 void (*extra_func)(AVFormatContext*, AVIOContext*, int, char*, ID3v2ExtraMeta**) = NULL;
372 reason = "compression";
390 unsync = flags & 0x80;
392 if (isv34 && flags & 0x40) /* Extended header present, just skip over it */
393 avio_skip(s->pb, get_size(s->pb, 4));
395 while (len >= taghdrlen) {
396 unsigned int tflags = 0;
400 avio_read(s->pb, tag, 4);
403 tlen = avio_rb32(s->pb);
405 tlen = get_size(s->pb, 4);
406 tflags = avio_rb16(s->pb);
407 tunsync = tflags & ID3v2_FLAG_UNSYNCH;
409 avio_read(s->pb, tag, 3);
411 tlen = avio_rb24(s->pb);
413 if (tlen > (1<<28) || !tlen)
415 len -= taghdrlen + tlen;
420 next = avio_tell(s->pb) + tlen;
422 if (tflags & ID3v2_FLAG_DATALEN) {
429 if (tflags & (ID3v2_FLAG_ENCRYPTION | ID3v2_FLAG_COMPRESSION)) {
430 av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag);
431 avio_skip(s->pb, tlen);
432 /* check for text tag or supported special meta tag */
433 } else if (tag[0] == 'T' || (extra_meta && (extra_func = get_extra_meta_func(tag, isv34)->read))) {
434 if (unsync || tunsync) {
436 av_fast_malloc(&buffer, &buffer_size, tlen);
438 av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", tlen);
441 for (i = 0, j = 0; i < tlen; i++, j++) {
442 buffer[j] = avio_r8(s->pb);
443 if (j > 0 && !buffer[j] && buffer[j - 1] == 0xff) {
444 /* Unsynchronised byte, skip it */
448 ffio_init_context(&pb, buffer, j, 0, NULL, NULL, NULL, NULL);
450 pbx = &pb; // read from sync buffer
452 pbx = s->pb; // read straight from input
456 read_ttag(s, pbx, tlen, tag);
458 /* parse special meta tag */
459 extra_func(s, pbx, tlen, tag, extra_meta);
463 av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding");
464 avio_skip(s->pb, tlen);
467 /* Skip to end of tag */
469 avio_seek(s->pb, next, SEEK_SET);
472 if (version == 4 && flags & 0x10) /* Footer preset, always 10 bytes, skip over it */
477 av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n", version, reason);
478 avio_seek(s->pb, end, SEEK_SET);
483 void ff_id3v2_read_all(AVFormatContext *s, const char *magic, ID3v2ExtraMeta **extra_meta)
486 uint8_t buf[ID3v2_HEADER_SIZE];
491 /* save the current offset in case there's nothing to read/skip */
492 off = avio_tell(s->pb);
493 ret = avio_read(s->pb, buf, ID3v2_HEADER_SIZE);
494 if (ret != ID3v2_HEADER_SIZE)
496 found_header = ff_id3v2_match(buf, magic);
498 /* parse ID3v2 header */
499 len = ((buf[6] & 0x7f) << 21) |
500 ((buf[7] & 0x7f) << 14) |
501 ((buf[8] & 0x7f) << 7) |
503 ff_id3v2_parse(s, len, buf[3], buf[5], extra_meta);
505 avio_seek(s->pb, off, SEEK_SET);
507 } while (found_header);
508 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_34_metadata_conv);
509 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_2_metadata_conv);
510 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_4_metadata_conv);
511 merge_date(&s->metadata);
514 void ff_id3v2_read(AVFormatContext *s, const char *magic)
516 ff_id3v2_read_all(s, magic, NULL);
519 void ff_id3v2_free_extra_meta(ID3v2ExtraMeta **extra_meta)
521 ID3v2ExtraMeta *current = *extra_meta, *next;
522 void (*free_func)(void *);
525 if ((free_func = get_extra_meta_func(current->tag, 1)->free))
526 free_func(current->data);
527 next = current->next;
533 const ID3v2EMFunc ff_id3v2_extra_meta_funcs[] = {
534 { "GEO", "GEOB", read_geobtag, free_geobtag },
535 { NULL, NULL, NULL, NULL }
538 const AVMetadataConv ff_id3v2_34_metadata_conv[] = {
540 { "TCOM", "composer"},
542 { "TCOP", "copyright"},
543 { "TENC", "encoded_by"},
545 { "TLAN", "language"},
547 { "TPE2", "album_artist"},
548 { "TPE3", "performer"},
550 { "TPUB", "publisher"},
552 { "TSSE", "encoder"},
556 const AVMetadataConv ff_id3v2_4_metadata_conv[] = {
559 { "TDEN", "creation_time"},
560 { "TSOA", "album-sort"},
561 { "TSOP", "artist-sort"},
562 { "TSOT", "title-sort"},
566 const AVMetadataConv ff_id3v2_2_metadata_conv[] = {
570 { "TEN", "encoded_by"},
572 { "TP2", "album_artist"},
573 { "TP3", "performer"},
579 const char ff_id3v2_tags[][4] = {
580 "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDLY", "TENC", "TEXT",
581 "TFLT", "TIT1", "TIT2", "TIT3", "TKEY", "TLAN", "TLEN", "TMED",
582 "TOAL", "TOFN", "TOLY", "TOPE", "TOWN", "TPE1", "TPE2", "TPE3",
583 "TPE4", "TPOS", "TPUB", "TRCK", "TRSN", "TRSO", "TSRC", "TSSE",
587 const char ff_id3v2_4_tags[][4] = {
588 "TDEN", "TDOR", "TDRC", "TDRL", "TDTG", "TIPL", "TMCL", "TMOO",
589 "TPRO", "TSOA", "TSOP", "TSOT", "TSST",
593 const char ff_id3v2_3_tags[][4] = {
594 "TDAT", "TIME", "TORY", "TRDA", "TSIZ", "TYER",